Example #1
0
def linear(args, output_size, bias, bias_start=0.0, init_constant_bias=False,
           initializer=None, scope=None, dtype=tf.float32):
  """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

  Args:
    args: a 2D Tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    bias: boolean, whether to add a bias term or not.
    bias_start: starting value to initialize the bias; 0 by default.
    init_constant_bias: boolean. If False, the variable scope initializer will
      be used to initialize the bias parameter. If True, the bias Parameters
      will be initialized to a constant value as definied in bias_start.
    scope: VariableScope for the created subgraph; defaults to "Linear".

  Returns:
      A 2D Tensor with shape [batch x output_size] equal to
      sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
  assert args is not None
  if args is None or (nest.is_sequence(args) and not args):
    raise ValueError("`args` must be specified")
  if not nest.is_sequence(args):
    args = [args]

  # Calculate the total size of arguments on dimension 1.
  total_arg_size = 0
  shapes = [a.get_shape().as_list() for a in args]
  for shape in shapes:
    if len(shape) != 2:
      raise ValueError("Linear is expecting 2D arguments: {0}".format(
        str(shapes)))
    if not shape[1]:
      raise ValueError("Linear expects shape[1] of arguments: {0}".format(
        str(shapes)))
    else:
      total_arg_size += shape[1]

  # dtype = [a.dtype for a in args][0]

  # Now the computation.
  with tf.variable_scope(scope or "Linear"):  # , reuse=reuse_variables):
    matrix = tf.get_variable("Matrix", [total_arg_size, output_size],
                             dtype=dtype, initializer=initializer)
    if len(args) == 1:
      res = tf.matmul(args[0], matrix)
    else:
      res = tf.matmul(tf.concat(axis=1, values=args), matrix)
    if not bias:
      return res

    if init_constant_bias:
      init_bias = tf.constant_initializer(bias_start)
    else:
      init_bias = initializer
    bias_term = tf.get_variable("Bias", [output_size], dtype=dtype,
                                initializer=init_bias)
  return res + bias_term
  def __init__(self, cells, state_is_tuple=True):
    """Create a RNN cell composed sequentially of a number of RNNCells.

    Args:
      cells: list of RNNCells that will be composed in this order.
      state_is_tuple: If True, accepted and returned states are n-tuples, where
        `n = len(cells)`.  If False, the states are all
        concatenated along the column axis.  This latter behavior will soon be
        deprecated.

    Raises:
      ValueError: if cells is empty (not allowed), or at least one of the cells
        returns a state tuple but the flag `state_is_tuple` is `False`.
    """
    super(MultiRNNCell, self).__init__()
    if not cells:
      raise ValueError("Must specify at least one cell for MultiRNNCell.")
    if not nest.is_sequence(cells):
      raise TypeError(
          "cells must be a list or tuple, but saw: %s." % cells)

    self._cells = cells
    self._state_is_tuple = state_is_tuple
    if not state_is_tuple:
      if any(nest.is_sequence(c.state_size) for c in self._cells):
        raise ValueError("Some cells return tuples of states, but the flag "
                         "state_is_tuple is not set.  State sizes are: %s"
                         % str([c.state_size for c in self._cells]))
def _linear(args, output_size, bias, bias_initializer=None,
            kernel_initializer=None):
  """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

  Args:
    args: a 2D Tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    bias: boolean, whether to add a bias term or not.
    bias_initializer: starting value to initialize the bias; None by default.
    kernel_initializer: starting value to initialize the weight; None by default.

  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
  if args is None or (nest.is_sequence(args) and not args):
    raise ValueError("`args` must be specified")
  if not nest.is_sequence(args):
    args = [args]

  # Calculate the total size of arguments on dimension 1.
  total_arg_size = 0
  shapes = [a.get_shape() for a in args]
  for shape in shapes:
    if shape.ndims != 2:
      raise ValueError("linear is expecting 2D arguments: %s" % shapes)
    if shape[1].value is None:
      raise ValueError("linear expects shape[1] to be provided for shape %s, "
                       "but saw %s" % (shape, shape[1]))
    else:
      total_arg_size += shape[1].value

  dtype = [a.dtype for a in args][0]

  # Now the computation.
  scope = vs.get_variable_scope()
  with vs.variable_scope(scope) as outer_scope:
    weights = vs.get_variable(
        _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size], dtype=dtype,
        initializer=kernel_initializer)
    if len(args) == 1:
      res = math_ops.matmul(args[0], weights)
    else:
      res = math_ops.matmul(array_ops.concat(args, 1), weights)
    if not bias:
      return res
    with vs.variable_scope(outer_scope) as inner_scope:
      inner_scope.set_partitioner(None)
      if bias_initializer is None:
        bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
      biases = vs.get_variable(
          _BIAS_VARIABLE_NAME, [output_size],
          dtype=dtype,
          initializer=bias_initializer)
    return nn_ops.bias_add(res, biases)
Example #4
0
 def resetstate(self):
     if nest.is_sequence(self.initial_state):
         if nest.is_sequence(self.initial_state[0]):
             state = tuple(tuple(is2.eval() for is2 in ist) for ist in self.initial_state)
         else:
             state = tuple(ist.eval() for ist in self.initial_state)
     else:
         state = self.initial_state.eval()
     return state
Example #5
0
def sum_logits(args, mask=None, name=None):
    with tf.name_scope(name or "sum_logits"):
        if args is None or (nest.is_sequence(args) and not args):
            raise ValueError("`args` must be specified")
        if not nest.is_sequence(args):
            args = [args]
        rank = len(args[0].get_shape())
        logits = sum(tf.reduce_sum(arg, rank-1) for arg in args)
        if mask is not None:
            logits = exp_mask(logits, mask)
        return logits
def linear(args, output_size, bias, bias_start=0.0, scope=None):
  """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

  Args:
    args: a 2D Tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    bias: boolean, whether to add a bias term or not.
    bias_start: starting value to initialize the bias; 0 by default.
    scope: (optional) Variable scope to create parameters in.

  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
  if args is None or (nest.is_sequence(args) and not args):
    raise ValueError("`args` must be specified")
  if not nest.is_sequence(args):
    args = [args]

  # Calculate the total size of arguments on dimension 1.
  total_arg_size = 0
  shapes = [a.get_shape() for a in args]
  for shape in shapes:
    if shape.ndims != 2:
      raise ValueError("linear is expecting 2D arguments: %s" % shapes)
    if shape[1].value is None:
      raise ValueError("linear expects shape[1] to be provided for shape %s, "
                       "but saw %s" % (shape, shape[1]))
    else:
      total_arg_size += shape[1].value

  dtype = [a.dtype for a in args][0]

  # Now the computation.
  with tf.variable_scope(scope) as outer_scope:
    weights = tf.get_variable(
        "weights", [total_arg_size, output_size], dtype=dtype)
    if len(args) == 1:
      res = tf.matmul(args[0], weights)
    else:
      res = tf.matmul(tf.concat(args, 1), weights)
    if not bias:
      return res
    with tf.variable_scope(outer_scope) as inner_scope:
      inner_scope.set_partitioner(None)
      biases = tf.get_variable(
          "biases", [output_size],
          dtype=dtype,
          initializer=tf.constant_initializer(bias_start, dtype=dtype))
  return tf.nn.bias_add(res, biases)
Example #7
0
def _linear(args, output_size, bias, bias_start=0.0, weights_init=None,
            trainable=True, restore=True, reuse=False, scope=None):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

    Arguments:
        args: a 2D Tensor or a list of 2D, batch x n, Tensors.
        output_size: int, second dimension of W[i].
        bias: boolean, whether to add a bias term or not.
        bias_start: starting value to initialize the bias; 0 by default.
        scope: VariableScope for the created subgraph; defaults to "Linear".

    Returns:
        A 2D Tensor with shape [batch x output_size] equal to
        sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

    Raises:
        ValueError: if some of the arguments has unspecified or wrong shape.
    """
    if args is None or (is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape().as_list() for a in args]
    for shape in shapes:
        if len(shape) != 2:
            raise ValueError(
                "Linear is expecting 2D arguments: %s" % str(shapes))
        if not shape[1]:
            raise ValueError(
                "Linear expects shape[1] of arguments: %s" % str(shapes))
        else:
            total_arg_size += shape[1]

    # Now the computation.
    with tf.variable_scope(scope or "Linear", reuse=reuse):
        matrix = va.variable("Matrix", [total_arg_size, output_size],
                             initializer=weights_init, trainable=trainable,
                             restore=restore)
        if len(args) == 1:
            res = tf.matmul(args[0], matrix)
        else:
            res = tf.matmul(array_ops.concat(1, args), matrix)
        if not bias:
            return res
        bias_term = va.variable(
            "Bias", [output_size],
            initializer=tf.constant_initializer(bias_start),
            trainable=trainable, restore=restore)
    return res + bias_term
Example #8
0
def linear(args, output_size, bias, bias_start=0.0, scope=None, squeeze=False, keep_prob=None, is_train=None):
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("args must be specified")
    if not nest.is_sequence(args):
        args = [args]
    flat_args = [flatten(arg, 1) for arg in args]
    if keep_prob is not None and is_train is not None:
        flat_args = [tf.cond(is_train, lambda: tf.nn.dropout(arg, keep_prob), lambda: arg) for arg in flat_args]
    with tf.variable_scope(scope or 'linear'):
        flat_out = _linear(flat_args, output_size, bias, bias_initializer=tf.constant_initializer(bias_start))
    out = reconstruct(flat_out, args[0], 1)
    if squeeze:
        out = tf.squeeze(out, [len(args[0].get_shape().as_list())-1])
    return out
Example #9
0
 def testIsSequence(self):
   self.assertFalse(nest.is_sequence("1234"))
   self.assertTrue(nest.is_sequence([1, 3, [4, 5]]))
   self.assertTrue(nest.is_sequence(((7, 8), (5, 6))))
   self.assertTrue(nest.is_sequence([]))
   self.assertTrue(nest.is_sequence({"a": 1, "b": 2}))
   self.assertFalse(nest.is_sequence(set([1, 2])))
   ones = array_ops.ones([2, 3])
   self.assertFalse(nest.is_sequence(ones))
   self.assertFalse(nest.is_sequence(math_ops.tanh(ones)))
   self.assertFalse(nest.is_sequence(np.ones((4, 5))))
Example #10
0
  def __call__(self, inputs, state, scope=None):
    """Run the cell with bottom layer's attention copied to all upper layers."""
    if not nest.is_sequence(state):
      raise ValueError(
          "Expected state to be a tuple of length %d, but received: %s"
          % (len(self.state_size), state))

    with tf.variable_scope(scope or "multi_rnn_cell"):
      new_states = []

      with tf.variable_scope("cell_0_attention"):
        attention_cell = self._cells[0]
        attention_state = state[0]
        cur_inp, new_attention_state = attention_cell(inputs, attention_state)
        new_states.append(new_attention_state)

      for i in range(1, len(self._cells)):
        with tf.variable_scope("cell_%d" % i):

          cell = self._cells[i]
          cur_state = state[i]

          if self.use_new_attention:
            cur_inp = tf.concat([cur_inp, new_attention_state.attention], -1)
          else:
            cur_inp = tf.concat([cur_inp, attention_state.attention], -1)

          cur_inp, new_state = cell(cur_inp, cur_state)
          new_states.append(new_state)

    return cur_inp, tuple(new_states)
Example #11
0
    def wrapped_body(loop_counter, *args):
      """Loop body augmented with counter update.

      Args:
        loop_counter: Loop counter which needs to be incremented in the body.
        *args: List of args
          args[:len_orig_loop_vars] - Args for the original loop body.
          args[len_orig_loop_vars:] - External captures of cond. These get
            passed through as is.

      Returns:
        A list of tensors the same length as args.
      """
      # Convert the flow variables in `args` to TensorArrays. `args` should
      # already have the same structure as `orig_loop_vars` but currently there
      # is no nest.zip so we call `_pack_sequence_as` which flattens both
      # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
      # and packs it into the structure of `orig_loop_vars`.
      outputs = body(
          *_pack_sequence_as(orig_loop_vars, args[:len_orig_loop_vars]))
      if not nest.is_sequence(outputs):
        outputs = [outputs]
      # Compare the structure of input and output of body converting the
      # top-level tuples to list to be compatible with legacy while_loop.
      nest.assert_same_structure(list(outputs), list(orig_loop_vars))

      outputs = _tensor_array_to_flow(outputs)

      # Return the external_captures of cond_graph as is, i.e., treat them as
      # loop invariants.
      # TODO(srbs): Update lowering code to create _Enter nodes with
      # is_constant=True for inputs that are directly passed to outputs.
      return [loop_counter + 1] + list(outputs) + list(
          args[len_orig_loop_vars:])
Example #12
0
def map_structure_with_atomic(is_atomic_fn, map_fn, nested):
  """Maps the atomic elements of a nested structure.

  Arguments:
    is_atomic_fn: A function that determines if an element of `nested` is
      atomic.
    map_fn: The function to apply to atomic elements of `nested`.
    nested: A nested structure.

  Returns:
    The nested structure, with atomic elements mapped according to `map_fn`.

  Raises:
    ValueError: If an element that is neither atomic nor a sequence is
      encountered.
  """
  if is_atomic_fn(nested):
    return map_fn(nested)

  # Recursively convert.
  if not nest.is_sequence(nested):
    raise ValueError(
        'Received non-atomic and non-sequence element: {}'.format(nested))
  if nest._is_mapping(nested):
    values = [nested[k] for k in nest._sorted(nested)]
  else:
    values = nested
  mapped_values = [
      map_structure_with_atomic(is_atomic_fn, map_fn, ele) for ele in values
  ]
  return nest._sequence_like(nested, mapped_values)
Example #13
0
 def __call__(self, inputs, state, scope=None):
   """Run this multi-layer cell on inputs, starting from state."""
   with vs.variable_scope(scope or type(self).__name__):  # "MultiRNNCell"
     cur_state_pos = 0
     cur_inp = inputs
     new_states = []
     for i, cell in enumerate(self._cells):
       with vs.variable_scope("Cell%d" % i):
         if self._state_is_tuple:
           if not nest.is_sequence(state):
             raise ValueError(
                 "Expected state to be a tuple of length %d, but received: %s"
                 % (len(self.state_size), state))
           cur_state = state[i]
         else:
           # print("STATE",state)
           """
           cur_state = array_ops.slice(
               state, [0, cur_state_pos], [-1, cell.state_size])
           """
           cur_state = array_ops.unpack(state)[i]
           # cur_state_pos += cell.state_size
         cur_inp, new_state = cell(cur_inp, cur_state)
         new_states.append(new_state)
   """
   new_states = (tuple(new_states) if self._state_is_tuple
                 else array_ops.concat(1, new_states))
   """
   new_states = array_ops.pack(new_states)
   return cur_inp, new_states
 def __call__(self, inputs, state, scope=None):
   """Run this multi-layer cell on inputs, starting from state."""
   with vs.variable_scope(scope or "multi_rnn_cell"):
     cur_state_pos = 0
     cur_inp = inputs
     new_states = []
     outputs = []
     for i, cell in enumerate(self._cells):
       with vs.variable_scope("cell_%d" % i):
         if self._state_is_tuple:
           if not nest.is_sequence(state):
             raise ValueError(
                 "Expected state to be a tuple of length %d, but received: %s"
                 % (len(self.state_size), state))
           cur_state = state[i]
         else:
           cur_state = array_ops.slice(
               state, [0, cur_state_pos], [-1, cell.state_size])
           cur_state_pos += cell.state_size
         cur_inp, new_state = cell(cur_inp, cur_state)
         outputs.append(cur_inp)
         new_states.append(new_state)
   new_states = (tuple(new_states) if self._state_is_tuple else
                 array_ops.concat_v2(new_states, 1))
   return tuple(outputs), new_states
Example #15
0
    def _create(self, encoder_output, decoder_state_size, **kwargs):
        """ Creates decoder's initial RNN states according to
        `decoder_state_size`.

        Passes the final state of encoder to each layer in decoder.
        Args:
            encoder_output: An instance of `collections.namedtuple`
              from `Encoder.encode()`.
            decoder_state_size: RNN decoder state size.
            **kwargs:

        Returns: The decoder states with the structure determined
          by `decoder_state_size`.

        Raises:
            ValueError: if the structure of encoder RNN state does not
              have the same structure of decoder RNN state.
        """
        batch_size = tf.shape(encoder_output.attention_length)[0]
        # of type LSTMStateTuple
        enc_final_state = _final_state(
            encoder_output.final_states, direction=self.params["direction"])
        assert_state_is_compatible(rnn_cell_impl._zero_state_tensors(
            decoder_state_size[0],
            batch_size, tf.float32), enc_final_state)
        if nest.is_sequence(decoder_state_size):
            return tuple([enc_final_state for _ in decoder_state_size])
        return enc_final_state
Example #16
0
def _infer_state_dtype(explicit_dtype, state):
  """Infer the dtype of an RNN state.

  Args:
    explicit_dtype: explicitly declared dtype or None.
    state: RNN's hidden state. Must be a Tensor or a nested iterable containing
      Tensors.

  Returns:
    dtype: inferred dtype of hidden state.

  Raises:
    ValueError: if `state` has heterogeneous dtypes or is empty.
  """
  if explicit_dtype is not None:
    return explicit_dtype
  elif nest.is_sequence(state):
    inferred_dtypes = [element.dtype for element in nest.flatten(state)]
    if not inferred_dtypes:
      raise ValueError("Unable to infer dtype from empty state.")
    all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes])
    if not all_same:
      raise ValueError(
          "State has tensors of different inferred_dtypes. Unable to infer a "
          "single representative dtype.")
    return inferred_dtypes[0]
  else:
    return state.dtype
Example #17
0
  def zero_state(self, batch_size, dtype):
    """Return zero-filled state tensor(s).

    Args:
      batch_size: int, float, or unit Tensor representing the batch size.
      dtype: the data type to use for the state.

    Returns:
      If `state_size` is an int or TensorShape, then the return value is a
      `N-D` tensor of shape `[batch_size x state_size]` filled with zeros.

      If `state_size` is a nested list or tuple, then the return value is
      a nested list or tuple (of the same structure) of `2-D` tensors with
    the shapes `[batch_size x s]` for each s in `state_size`.
    """
    state_size = self.state_size
    if nest.is_sequence(state_size):
      state_size_flat = nest.flatten(state_size)
      zeros_flat = [
          array_ops.zeros(
              array_ops.pack(_state_size_with_prefix(s, prefix=[batch_size])),
              dtype=dtype)
          for s in state_size_flat]
      for s, z in zip(state_size_flat, zeros_flat):
        z.set_shape(_state_size_with_prefix(s, prefix=[None]))
      zeros = nest.pack_sequence_as(structure=state_size,
                                    flat_sequence=zeros_flat)
    else:
      zeros_size = _state_size_with_prefix(state_size, prefix=[batch_size])
      zeros = array_ops.zeros(array_ops.pack(zeros_size), dtype=dtype)
      zeros.set_shape(_state_size_with_prefix(state_size, prefix=[None]))

    return zeros
    def _tile_along_beam(cls, beam_size, state):
        if nest.is_sequence(state):
            return nest_map(
                lambda val: cls._tile_along_beam(beam_size, val),
                state
            )

        if not isinstance(state, tf.Tensor):
            raise ValueError("State should be a sequence or tensor")

        tensor = state

        tensor_shape = tensor.get_shape().with_rank_at_least(1)

        try:
            new_first_dim = tensor_shape[0] * beam_size
        except:
            new_first_dim = None

        dynamic_tensor_shape = tf.unpack(tf.shape(tensor))
        res = tf.expand_dims(tensor, 1)
        res = tf.tile(res, [1, beam_size] + [1] * (tensor_shape.ndims-1))
        res = tf.reshape(res, [-1] + list(dynamic_tensor_shape[1:]))
        res.set_shape([new_first_dim] + list(tensor_shape[1:]))
        return res
Example #19
0
 def attention(query):
   """Put attention masks on hidden using hidden_features and query."""
   ds = []  # Results of attention reads will be stored here.
   if nest.is_sequence(query):  # If the query is a tuple, flatten it.
     query_list = nest.flatten(query)
     for q in query_list:  # Check that ndims == 2 if specified.
       ndims = q.get_shape().ndims
       if ndims:
         assert ndims == 2
     query = array_ops.concat(1, query_list)
   for i in xrange(num_heads):
     with variable_scope.variable_scope("Attention_%d" % i):                  
       y = linear(query, attention_vec_size, True)
       y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
       # Attention mask is a softmax of v^T * tanh(...).
       s = math_ops.reduce_sum(
           v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
       # multiply with source mask, then do softmax
       if src_mask is not None:
         s = s * src_mask
       a = nn_ops.softmax(s)
       # Now calculate the attention-weighted vector d.
       d = math_ops.reduce_sum(
           array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
           [1, 2])                  
       ds.append(array_ops.reshape(d, [-1, attn_size]))
   return ds            
Example #20
0
    def wrapped_body(loop_counter, *args):
      """Loop body augmented with counter update.

      Args:
        loop_counter: Loop counter which needs to be incremented in the body.
        *args: List of args

      Returns:
        A list of tensors the same length as args.
      """
      # Capture the tensors already captured in cond_graph so that they appear
      # in the same order in body_graph.external_captures.
      for t in cond_graph.external_captures:
        ops.get_default_graph().capture(t)

      # Convert the flow variables in `args` to TensorArrays. `args` should
      # already have the same structure as `orig_loop_vars` but currently there
      # is no nest.zip so we call `_pack_sequence_as` which flattens both
      # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
      # and packs it into the structure of `orig_loop_vars`.
      outputs = body(*_pack_sequence_as(orig_loop_vars, args))
      if not nest.is_sequence(outputs):
        outputs = [outputs]
      # Compare the structure of input and output of body converting the
      # top-level tuples to list to be compatible with legacy while_loop.
      nest.assert_same_structure(list(outputs), list(orig_loop_vars))

      outputs = _tensor_array_to_flow(outputs)

      # TODO(srbs): Update lowering code to create _Enter nodes with
      # is_constant=True for inputs that are directly passed to outputs.
      return [loop_counter + 1] + list(outputs)
Example #21
0
 def attention(self, state):
     """Put attention masks on hidden using hidden_features and query."""
     ds = []  # Results of attention reads will be stored here.
     if nest.is_sequence(state):  # If the query is a tuple, flatten it.
         # query_list = nest.flatten(state)
         # for q in query_list:  # Check that ndims == 2 if specified.
         #   ndims = q.get_shape().ndims
         #   if ndims:
         #     assert ndims == 2
         # state = tf.concat(1, query_list)
         state = state[1]
     for a in xrange(self.num_heads):
         with tf.variable_scope("Attention_%d" % a, reuse=self.reuse_variables):
             y = tf.reshape(state, [-1, 1, 1, self.attn_vec_dim])
             # Attention mask is a softmax of v^T * tanh(...).
             # s = tf.reduce_sum(
             #     v[a] * tf.tanh(hidden_features[a] + y), [2, 3])
             # s = tf.reduce_sum(
             #     self.v[a] * tf.mul(self.hidden_features[a], y), [2, 3])
             s = tf.reduce_sum(tf.mul(self.hidden_features[a], y), [2, 3])
             s = s - (1 - self.encoder_attn_masks) * 1e12
             attn_mask = tf.nn.softmax(s)
             # Now calculate the attention-weighted vector d.
             d = tf.reduce_sum(tf.reshape(attn_mask, [-1, self.attn_length, 1, 1]) * self.hidden_features[a], [1, 2])
             ds.append(tf.reshape(d, [-1, self.attn_dim]))
     attns = tf.concat(1, ds)
     attns.set_shape([None, self.num_heads * self.attn_dim])
     self.attention_vars = True
     return attns, attn_mask
  def match(self, expected, actual):
    """Matches nested structures.

    Recursively matches shape and values of `expected` and `actual`.
    Handles scalars, numpy arrays and other python sequence containers
    e.g. list, dict.

    Args:
      expected: Nested structure 1.
      actual: Nested structure 2.

    Raises:
      AssertionError if matching fails.
    """
    if isinstance(expected, np.ndarray):
      expected = expected.tolist()
    if isinstance(actual, np.ndarray):
      actual = actual.tolist()
    self.assertEqual(type(expected), type(actual))

    if nest.is_sequence(expected):
      self.assertEqual(len(expected), len(actual))
      if isinstance(expected, dict):
        for key1, key2 in zip(sorted(expected), sorted(actual)):
          self.assertEqual(key1, key2)
          self.match(expected[key1], actual[key2])
      else:
        for item1, item2 in zip(expected, actual):
          self.match(item1, item2)
    else:
      self.assertEqual(expected, actual)
Example #23
0
 def testIsSequence(self):
   self.assertFalse(nest.is_sequence("1234"))
   self.assertTrue(nest.is_sequence([1, 3, [4, 5]]))
   self.assertTrue(nest.is_sequence(((7, 8), (5, 6))))
   self.assertTrue(nest.is_sequence([]))
   self.assertFalse(nest.is_sequence(set([1, 2])))
   ones = tf.ones([2, 3])
   self.assertFalse(nest.is_sequence(ones))
   self.assertFalse(nest.is_sequence(tf.tanh(ones)))
   self.assertFalse(nest.is_sequence(np.ones((4, 5))))
Example #24
0
  def __init__(self,
               args,
               output_size,
               build_bias,
               bias_initializer=None,
               kernel_initializer=None):
    self._build_bias = build_bias

    if args is None or (nest.is_sequence(args) and not args):
      raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
      args = [args]
      self._is_sequence = False
    else:
      self._is_sequence = True

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape() for a in args]
    for shape in shapes:
      if shape.ndims != 2:
        raise ValueError("linear is expecting 2D arguments: %s" % shapes)
      if shape[1].value is None:
        raise ValueError("linear expects shape[1] to be provided for shape %s, "
                         "but saw %s" % (shape, shape[1]))
      else:
        total_arg_size += shape[1].value

    dtype = [a.dtype for a in args][0]

    scope = vs.get_variable_scope()
    with vs.variable_scope(scope) as outer_scope:
      self._weights = vs.get_variable(
          _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size],
          dtype=dtype,
          initializer=kernel_initializer)
      if build_bias:
        with vs.variable_scope(outer_scope) as inner_scope:
          inner_scope.set_partitioner(None)
          if bias_initializer is None:
            bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
          self._biases = vs.get_variable(
              _BIAS_VARIABLE_NAME, [output_size],
              dtype=dtype,
              initializer=bias_initializer)
Example #25
0
def _check_default_value(shape, default_value, dtype, key):
  """Returns default value as tuple if it's valid, otherwise raises errors.

  This function verifies that `default_value` is compatible with both `shape`
  and `dtype`. If it is not compatible, it raises an error. If it is compatible,
  it casts default_value to a tuple and returns it. `key` is used only
  for error message.

  Args:
    shape: An iterable of integers specifies the shape of the `Tensor`.
    default_value: If a single value is provided, the same value will be applied
      as the default value for every item. If an iterable of values is
      provided, the shape of the `default_value` should be equal to the given
      `shape`.
    dtype: defines the type of values. Default value is `tf.float32`. Must be a
      non-quantized, real integer or floating point type.
    key: A string providing key to look up corresponding `Tensor`.

  Returns:
    A tuple which will be used as default value.

  Raises:
    TypeError: if `default_value` is an iterable but not compatible with `shape`
    TypeError: if `default_value` is not compatible with `dtype`.
    ValueError: if `dtype` is not convertible to `tf.float32`.
  """
  if default_value is None:
    return None

  if isinstance(default_value, int):
    return _create_tuple(shape, default_value)

  if isinstance(default_value, float) and dtype.is_floating:
    return _create_tuple(shape, default_value)

  if callable(getattr(default_value, 'tolist', None)):  # Handles numpy arrays
    default_value = default_value.tolist()

  if nest.is_sequence(default_value):
    if not _is_shape_and_default_value_compatible(default_value, shape):
      raise ValueError(
          'The shape of default_value must be equal to given shape. '
          'default_value: {}, shape: {}, key: {}'.format(
              default_value, shape, key))
    # Check if the values in the list are all integers or are convertible to
    # floats.
    is_list_all_int = all(
        isinstance(v, int) for v in nest.flatten(default_value))
    is_list_has_float = any(
        isinstance(v, float) for v in nest.flatten(default_value))
    if is_list_all_int:
      return _as_tuple(default_value)
    if is_list_has_float and dtype.is_floating:
      return _as_tuple(default_value)
  raise TypeError('default_value must be compatible with dtype. '
                  'default_value: {}, dtype: {}, key: {}'.format(
                      default_value, dtype, key))
Example #26
0
 def wrap_state(self, state):
     dummy = BeamDecoderCellWrapper(None, self.num_classes, self.max_len, self.stop_token, self.beam_size)
     if nest.is_sequence(state):
         batch_size = tf.shape(nest.flatten(state)[0])[0]
         dtype = nest.flatten(state)[0].dtype
     else:
         batch_size = tf.shape(state)[0]
         dtype = state.dtype
     return dummy._create_state(batch_size, dtype, cell_state=state)
Example #27
0
  def _fwlinear(self, args, output_size, scope=None):
    if args is None or (nest.is_sequence(args) and not args):
      raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
      args = [args]
    assert len(args) == 2
    assert args[0].get_shape().as_list()[1] == output_size

    dtype = [a.dtype for a in args][0]

    with vs.variable_scope(scope or "Linear"):
      # matrixW = vs.get_variable(
      #   "MatrixW", dtype=dtype, initializer=tf.convert_to_tensor(np.eye(output_size, dtype=np.float32) * .05))
      matrixW = vs.get_variable("MatrixW", [output_size, output_size], dtype=dtype)
      matrixC = vs.get_variable(
        "MatrixC", [args[1].get_shape().as_list()[1], output_size], dtype=dtype)

      res = tf.matmul(args[0], matrixW) + tf.matmul(args[1], matrixC)
      return res
 def __init__(self, cells, state_is_tuple=True):
   if not cells:
     raise ValueError("Must specify at least one cell for MultiRNNCell.")
   self._cells = cells
   self._state_is_tuple = state_is_tuple
   if not state_is_tuple:
     if any(nest.is_sequence(c.state_size) for c in self._cells):
       raise ValueError("Some cells return tuples of states, but the flag "
                        "state_is_tuple is not set.  State sizes are: %s"
                        % str([c.state_size for c in self._cells]))
Example #29
0
def linear(args, output_size, bias, bias_start=0.0, scope=None, squeeze=False, wd=0.0, input_keep_prob=1.0,
           is_train=None):
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
        args = [args]

    flat_args = [flatten(arg, 1) for arg in args]
    if input_keep_prob < 1.0:
        assert is_train is not None
        flat_args = [tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob), lambda: arg)
                     for arg in flat_args]
    flat_out = _linear(flat_args, output_size, bias, bias_start=bias_start, scope=scope)
    out = reconstruct(flat_out, args[0], 1)
    if squeeze:
        out = tf.squeeze(out, [len(args[0].get_shape().as_list())-1])
    if wd:
        add_wd(wd)

    return out
 def wrap_state(self, state, output_projection):
     dummy = BeamDecoderCellWrapper(None, output_projection, self.num_classes, self.max_len,
                                    self.start_token, self.stop_token,
                                    self.batch_size, self.beam_size,
                                    self.use_attention,
                                    self.alpha)
     if nest.is_sequence(state):
         dtype = nest.flatten(state)[0].dtype
     else:
         dtype = state.dtype
     return dummy._create_state(self.batch_size, dtype, cell_state=state)
Example #31
0
def embedding_rnn_seq2seq(encoder_inputs,
                          decoder_inputs,
                          cell,
                          num_encoder_symbols,
                          num_decoder_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=None,
                          scope=None):

  with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope:
    if dtype is not None:
      scope.set_dtype(dtype)
    else:
      dtype = scope.dtype

    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

    if isinstance(feed_previous, bool):
      return embedding_rnn_decoder(
          decoder_inputs,
          encoder_state,
          cell,
          num_decoder_symbols,
          embedding_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          scope=scope)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse) as scope:
        outputs, state = embedding_rnn_decoder(
            decoder_inputs, encoder_state, cell, num_decoder_symbols,
            embedding_size, output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(structure=encoder_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
Example #32
0
def prune_extra_keys(narrow, wide):
    """Recursively prunes keys from `wide` if they don't appear in `narrow`.

  Often used as preprocessing prior to calling `tf.nest.flatten`
  or `tf.nest.map_structure`.

  This function is more forgiving than the ones in `nest`; if two substructures'
  types or structures don't agree, we consider it invalid and `prune_extra_keys`
  will return the `wide` substructure as is.  Typically, additional checking is
  needed: you will also want to use
  `nest.assert_same_structure(narrow, prune_extra_keys(narrow, wide))`
  to ensure the result of pruning is still a correct structure.

  Examples:
  ```python
  wide = [{"a": "a", "b": "b"}]
  # Narrows 'wide'
  assert prune_extra_keys([{"a": 1}], wide) == [{"a": "a"}]
  # 'wide' lacks "c", is considered invalid.
  assert prune_extra_keys([{"c": 1}], wide) == wide
  # 'wide' contains a different type from 'narrow', is considered invalid
  assert prune_extra_keys("scalar", wide) == wide
  # 'wide' substructure for key "d" does not match the one in 'narrow' and
  # therefore is returned unmodified.
  assert (prune_extra_keys({"a": {"b": 1}, "d": None},
                           {"a": {"b": "b", "c": "c"}, "d": [1, 2]})
          == {"a": {"b": "b"}, "d": [1, 2]})
  ```

  Args:
    narrow: A nested structure.
    wide: A nested structure that may contain dicts with more fields than
      `narrow`.

  Returns:
    A structure with the same nested substructures as `wide`, but with
    dicts whose entries are limited to the keys found in the associated
    substructures of `narrow`.

    In case of substructure or size mismatches, the returned substructures
    will be returned as is.  Note that ObjectProxy-wrapped objects are
    considered equivalent to their non-ObjectProxy types.
  """
    if isinstance(wide, wrapt.ObjectProxy):
        return type(wide)(prune_extra_keys(narrow, wide.__wrapped__))

    narrow_raw = (narrow.__wrapped__
                  if isinstance(narrow, wrapt.ObjectProxy) else narrow)
    wide_raw = (wide.__wrapped__
                if isinstance(wide, wrapt.ObjectProxy) else wide)

    if ((type(narrow_raw) != type(wide_raw))  # pylint: disable=unidiomatic-typecheck
            and
            not (isinstance(narrow_raw, list) and isinstance(wide_raw, list))
            and not (isinstance(narrow_raw, collections_abc.Mapping)
                     and isinstance(wide_raw, collections_abc.Mapping))):
        # We return early if the types are different; but we make some exceptions:
        #  list subtypes are considered the same (e.g. ListWrapper and list())
        #  Mapping subtypes are considered the same (e.g. DictWrapper and dict())
        #  (TupleWrapper subtypes are handled by unwrapping ObjectProxy above)
        return wide

    if isinstance(narrow, collections_abc.Mapping):
        if len(narrow) > len(wide):
            # wide lacks a required key from narrow; return early.
            return wide

        narrow_keys = set(narrow.keys())
        wide_keys = set(wide.keys())
        if not wide_keys.issuperset(narrow_keys):
            # wide lacks a required key from narrow; return early.
            return wide
        ordered_items = [(k, prune_extra_keys(v, wide[k]))
                         for k, v in narrow.items()]
        if isinstance(wide, collections.defaultdict):
            subset = type(wide)(wide.default_factory, ordered_items)
        else:
            subset = type(wide)(ordered_items)
        return subset

    if nest.is_sequence(narrow):
        if _is_attrs(wide):
            items = [
                prune_extra_keys(n, w)
                for n, w in zip(_attr_items(narrow), _attr_items(wide))
            ]
            return type(wide)(*items)

        # Not an attrs, so can treat as lists or tuples from here on.
        if len(narrow) != len(wide):
            # wide's size is different than narrow; return early.
            return wide

        items = [prune_extra_keys(n, w) for n, w in zip(narrow, wide)]
        if _is_namedtuple(wide):
            return type(wide)(*items)
        elif _is_attrs(wide):
            return type(wide)
        return type(wide)(items)

    # narrow is a leaf, just return wide
    return wide
Example #33
0
def embedding_tied_rnn_seq2seq(encoder_inputs,
                               decoder_inputs,
                               cell,
                               num_symbols,
                               embedding_size,
                               num_decoder_symbols=None,
                               output_projection=None,
                               feed_previous=False,
                               dtype=None,
                               scope=None):
  """Embedding RNN sequence-to-sequence model with tied (shared) parameters.
  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_symbols x input_size]). Then it runs an RNN to encode embedded
  encoder_inputs into a state vector. Next, it embeds decoder_inputs using
  the same embedding. Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs. The decoder output is over symbols
  from 0 to num_decoder_symbols - 1 if num_decoder_symbols is none; otherwise it
  is over 0 to num_symbols - 1.
  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_symbols: Integer; number of symbols for both encoder and decoder.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_decoder_symbols: Integer; number of output symbols for decoder. If
      provided, the decoder output is over symbols 0 to num_decoder_symbols - 1.
      Otherwise, decoder output is over symbols 0 to num_symbols - 1. Note that
      this assumes that the vocabulary is set up such that the first
      num_decoder_symbols of num_symbols are part of decoding.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_symbols] and B has
      shape [num_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype to use for the initial RNN states (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_tied_rnn_seq2seq".
  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x output_symbols] containing the generated
        outputs where output_symbols = num_decoder_symbols if
        num_decoder_symbols is not None otherwise output_symbols = num_symbols.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  Raises:
    ValueError: When output_projection has the wrong shape.
  """
  with variable_scope.variable_scope(
      scope or "embedding_tied_rnn_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype

    if output_projection is not None:
      proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype)
      proj_weights.get_shape().assert_is_compatible_with([None, num_symbols])
      proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype)
      proj_biases.get_shape().assert_is_compatible_with([num_symbols])

    embedding = variable_scope.get_variable(
        "embedding", [num_symbols, embedding_size], dtype=dtype)

    emb_encoder_inputs = [embedding_ops.embedding_lookup(embedding, x)
                          for x in encoder_inputs]
    emb_decoder_inputs = [embedding_ops.embedding_lookup(embedding, x)
                          for x in decoder_inputs]

    output_symbols = num_symbols
    if num_decoder_symbols is not None:
      output_symbols = num_decoder_symbols
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, output_symbols)

    if isinstance(feed_previous, bool):
      loop_function = _extract_argmax_and_embed(
          embedding, output_projection, True) if feed_previous else None
      return tied_rnn_seq2seq(emb_encoder_inputs, emb_decoder_inputs, cell,
                              loop_function=loop_function, dtype=dtype)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      loop_function = _extract_argmax_and_embed(
        embedding, output_projection, False) if feed_previous_bool else None
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                         reuse=reuse):
        outputs, state = tied_rnn_seq2seq(
            emb_encoder_inputs, emb_decoder_inputs, cell,
            loop_function=loop_function, dtype=dtype)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    # Calculate zero-state to know it's structure.
    static_batch_size = encoder_inputs[0].get_shape()[0]
    for inp in encoder_inputs[1:]:
      static_batch_size.merge_with(inp.get_shape()[0])
    batch_size = static_batch_size.value
    if batch_size is None:
      batch_size = array_ops.shape(encoder_inputs[0])[0]
    zero_state = cell.zero_state(batch_size, dtype)
    if nest.is_sequence(zero_state):
      state = nest.pack_sequence_as(structure=zero_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
def nest_map(func, nested):
    if not nest.is_sequence(nested):
        return func(nested)
    flat = nest.flatten(nested)
    return nest.pack_sequence_as(nested, list(map(func, flat)))
        def attention(query):
            """
      Put attention masks on hidden using hidden_features and query.
      :param query: Vector to compute attention with
      """
            # Results of attention reads will be stored here.
            ds = []
            # Will store masks over encoder context
            attn_masks = []
            # Store attention logits
            attn_logits = []
            # If the query is a tuple, flatten it.
            if nest.is_sequence(query):
                query_list = nest.flatten(query)
                # Check that ndims == 2 if specified.
                for q in query_list:
                    ndims = q.get_shape().ndims
                    if ndims:
                        assert ndims == 2
                query = array_ops.concat(1, query_list)
            for a in xrange(num_heads):
                with variable_scope.variable_scope("Attention_%d" % a):
                    if attn_type == "linear":
                        y = linear(query, attention_vec_size, True)
                        y = array_ops.reshape(y,
                                              [-1, 1, 1, attention_vec_size])
                        # Attention mask is a softmax of v^T * tanh(...).
                        s = math_ops.reduce_sum(
                            v[a] * math_ops.tanh(hidden_features[a] + y),
                            [2, 3])
                    elif attn_type == "bilinear":
                        query = tf.tile(tf.expand_dims(query, 1),
                                        [1, attn_length, 1])
                        query = batch_linear(query, attn_size, bias=True)
                        hid = tf.squeeze(hidden, [2])
                        s = tf.reduce_sum(tf.mul(query, hid), [2])
                    else:
                        # Two layer MLP
                        y = linear(query, attention_vec_size, True)
                        y = array_ops.reshape(y,
                                              [-1, 1, 1, attention_vec_size])
                        # Attention mask is a softmax of v^T * tanh(...).
                        layer1 = math_ops.tanh(hidden_features[a] + y)
                        k2 = variable_scope.get_variable(
                            "AttnW_%d" % a,
                            [1, 1, attn_size, attention_vec_size])
                        layer2 = nn_ops.conv2d(layer1, k2, [1, 1, 1, 1],
                                               "SAME")
                        s = math_ops.reduce_sum(v[a] * math_ops.tanh(layer2),
                                                [2, 3])

                    a = nn_ops.softmax(s)
                    attn_masks.append(a)
                    attn_logits.append(s)
                    # Now calculate the attention-weighted vector d. Hidden is encoder
                    # hidden states
                    d = math_ops.reduce_sum(
                        array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
                        [1, 2])
                    ds.append(array_ops.reshape(d, [-1, attn_size]))
            return ds, attn_masks, attn_logits
Example #36
0
    def __init__(self,
                 input_tensor_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=None,
                 dropout_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 weight_decay_params=None,
                 kernel_initializer=None,
                 batch_squash=True,
                 dtype=tf.float32,
                 name='EncodingNetwork',
                 conv_type=CONV_TYPE_2D):
        """Creates an instance of `EncodingNetwork`.

    Network supports calls with shape outer_rank + input_tensor_spec.shape. Note
    outer_rank must be at least 1.

    For example an input tensor spec with shape `(2, 3)` will require
    inputs with at least a batch size, the input shape is `(?, 2, 3)`.

    Input preprocessing is possible via `preprocessing_layers` and
    `preprocessing_combiner` Layers.  If the `preprocessing_layers` nest is
    shallower than `input_tensor_spec`, then the layers will get the subnests.
    For example, if:

    ```python
    input_tensor_spec = ([TensorSpec(3)] * 2, [TensorSpec(3)] * 5)
    preprocessing_layers = (Layer1(), Layer2())
    ```

    then preprocessing will call:

    ```python
    preprocessed = [preprocessing_layers[0](observations[0]),
                    preprocessing_layers[1](obsrevations[1])]
    ```

    However if

    ```python
    preprocessing_layers = ([Layer1() for _ in range(2)],
                            [Layer2() for _ in range(5)])
    ```

    then preprocessing will call:
    ```python
    preprocessed = [
      layer(obs) for layer, obs in zip(flatten(preprocessing_layers),
                                       flatten(observations))
    ]
    ```

    **NOTE** `preprocessing_layers` and `preprocessing_combiner` are not allowed
    to have already been built.  This ensures calls to `network.copy()` in the
    future always have an unbuilt, fresh set of parameters.  Furtheremore,
    a shallow copy of the layers is always created by the Network, so the
    layer objects passed to the network are never modified.  For more details
    of the semantics of `copy`, see the docstring of
    `tf_agents.networks.Network.copy`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input observations.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations. All of these
        layers must not be already built.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them.  Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This
        layer must not be already built.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is either a length-three tuple indicating
        `(filters, kernel_size, stride)` or a length-four tuple indicating
        `(filters, kernel_size, stride, dilation_rate)`.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      dropout_layer_params: Optional list of dropout layer parameters, each item
        is the fraction of input units to drop or a dictionary of parameters
        according to the keras.Dropout documentation. The additional parameter
        `permanent', if set to True, allows to apply dropout at inference for
        approximated Bayesian inference. The dropout layers are interleaved with
        the fully connected layers; there is a dropout layer after each fully
        connected layer, except if the entry in the list is None. This list must
        have the same length of fc_layer_params, or be None.
      activation_fn: Activation function, e.g. tf.keras.activations.relu.
      weight_decay_params: Optional list of weight decay parameters for the
        fully connected layers.
      kernel_initializer: Initializer to use for the kernels of the conv and
        dense layers. If none is provided a default variance_scaling_initializer
      batch_squash: If True the outer_ranks of the observation are squashed into
        the batch dimension. This allow encoding networks to be used with
        observations with shape [BxTx...].
      dtype: The dtype to use by the convolution and fully connected layers.
      name: A string representing name of the network.
      conv_type: string, '1d' or '2d'. Convolution layers will be 1d or 2D
        respectively

    Raises:
      ValueError: If any of `preprocessing_layers` is already built.
      ValueError: If `preprocessing_combiner` is already built.
      ValueError: If the number of dropout layer parameters does not match the
        number of fully connected layer parameters.
      ValueError: If conv_layer_params tuples do not have 3 or 4 elements each.
    """
        if preprocessing_layers is None:
            flat_preprocessing_layers = None
        else:
            flat_preprocessing_layers = [
                _copy_layer(layer)
                for layer in tf.nest.flatten(preprocessing_layers)
            ]
            # Assert shallow structure is the same. This verifies preprocessing
            # layers can be applied on expected input nests.
            input_nest = input_tensor_spec
            # Given the flatten on preprocessing_layers above we need to make sure
            # input_tensor_spec is a sequence for the shallow_structure check below
            # to work.
            if not nest.is_sequence(input_tensor_spec):
                input_nest = [input_tensor_spec]
            nest.assert_shallow_structure(preprocessing_layers,
                                          input_nest,
                                          check_types=False)

        if (len(tf.nest.flatten(input_tensor_spec)) > 1
                and preprocessing_combiner is None):
            raise ValueError(
                'preprocessing_combiner layer is required when more than 1 '
                'input_tensor_spec is provided.')

        if preprocessing_combiner is not None:
            preprocessing_combiner = _copy_layer(preprocessing_combiner)

        if not kernel_initializer:
            kernel_initializer = tf.compat.v1.variance_scaling_initializer(
                scale=2.0, mode='fan_in', distribution='truncated_normal')

        layers = []

        if conv_layer_params:
            if conv_type == '2d':
                conv_layer_type = tf.keras.layers.Conv2D
            elif conv_type == '1d':
                conv_layer_type = tf.keras.layers.Conv1D
            else:
                raise ValueError('unsupported conv type of %s. Use 1d or 2d' %
                                 (conv_type))

            for config in conv_layer_params:
                if len(config) == 4:
                    (filters, kernel_size, strides, dilation_rate) = config
                elif len(config) == 3:
                    (filters, kernel_size, strides) = config
                    dilation_rate = (1, 1) if conv_type == '2d' else (1, )
                else:
                    raise ValueError(
                        'only 3 or 4 elements permitted in conv_layer_params tuples'
                    )
                layers.append(
                    conv_layer_type(filters=filters,
                                    kernel_size=kernel_size,
                                    strides=strides,
                                    dilation_rate=dilation_rate,
                                    activation=activation_fn,
                                    kernel_initializer=kernel_initializer,
                                    dtype=dtype))

        layers.append(tf.keras.layers.Flatten())

        if fc_layer_params:
            if dropout_layer_params is None:
                dropout_layer_params = [None] * len(fc_layer_params)
            else:
                if len(dropout_layer_params) != len(fc_layer_params):
                    raise ValueError(
                        'Dropout and fully connected layer parameter lists'
                        'have different lengths (%d vs. %d.)' %
                        (len(dropout_layer_params), len(fc_layer_params)))
            if weight_decay_params is None:
                weight_decay_params = [None] * len(fc_layer_params)
            else:
                if len(weight_decay_params) != len(fc_layer_params):
                    raise ValueError(
                        'Weight decay and fully connected layer parameter '
                        'lists have different lengths (%d vs. %d.)' %
                        (len(weight_decay_params), len(fc_layer_params)))

            for num_units, dropout_params, weight_decay in zip(
                    fc_layer_params, dropout_layer_params,
                    weight_decay_params):
                kernal_regularizer = None
                if weight_decay is not None:
                    kernal_regularizer = tf.keras.regularizers.l2(weight_decay)
                layers.append(
                    tf.keras.layers.Dense(
                        num_units,
                        activation=activation_fn,
                        kernel_initializer=kernel_initializer,
                        kernel_regularizer=kernal_regularizer,
                        dtype=dtype))
                if not isinstance(dropout_params, dict):
                    dropout_params = {
                        'rate': dropout_params
                    } if dropout_params else None

                if dropout_params is not None:
                    layers.append(
                        utils.maybe_permanent_dropout(**dropout_params))

        super(EncodingNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        # Pull out the nest structure of the preprocessing layers. This avoids
        # saving the original kwarg layers as a class attribute which Keras would
        # then track.
        self._preprocessing_nest = tf.nest.map_structure(
            lambda l: None, preprocessing_layers)
        self._flat_preprocessing_layers = flat_preprocessing_layers
        self._preprocessing_combiner = preprocessing_combiner
        self._postprocessing_layers = layers
        self._batch_squash = batch_squash
def _linear(args,
            output_size,
            bias,
            bias_initializer=None,
            kernel_initializer=None):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

  Args:
    args: a 2D Tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    bias: boolean, whether to add a bias term or not.
    bias_initializer: starting value to initialize the bias
      (default is all zeros).
    kernel_initializer: starting value to initialize the weight.

  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape() for a in args]
    for shape in shapes:
        if shape.ndims != 2:
            raise ValueError("linear is expecting 2D arguments: %s" % shapes)
        if shape[1].value is None:
            raise ValueError(
                "linear expects shape[1] to be provided for shape %s, "
                "but saw %s" % (shape, shape[1]))
        else:
            total_arg_size += shape[1].value

    dtype = [a.dtype for a in args][0]

    # Now the computation.
    scope = vs.get_variable_scope()
    with vs.variable_scope(scope) as outer_scope:
        #S = int(partial * output_size)
        #U = vs.get_variable(_WEIGHTS_VARIABLE_NAME+"_U", [total_arg_size, S],dtype=dtype,initializer=kernel_initializer)
        #V = vs.get_variable(_WEIGHTS_VARIABLE_NAME+"_V", [S, output_size],dtype=dtype,initializer=kernel_initializer)

        #weights = math_ops.matmul(U,V)
        weights = vs.get_variable(_WEIGHTS_VARIABLE_NAME,
                                  [total_arg_size, output_size],
                                  dtype=dtype,
                                  initializer=kernel_initializer)
        #print("weight created - ", total_arg_size, " on ", output_size)

        # eye = linalg_ops.eye(output_size)
        #corrMat = gen_array_ops.fill([output_size, output_size], welchBound)
        #eye = linalg_ops.eye(out)
        #corrMat = eye + (1.0 - eye) * corrMat
        #coherence_loss = coherence * math_ops.reduce_max(math_ops.abs(math_ops.matmul(weights, weights, transpose_a=True)) - corrMat)

        #coherence_loss = 0.0
        #eye = linalg_ops.eye(output_size//4)
        #i_co
        #mat = weights[:,:output_size//4]
        #mat = math_ops.matmul(mat, mat, transpose_a=True)
        #coherence_loss = math_ops.reduce_max(mat)
        #coherence_loss  += math_ops.abs(math_ops.reduce_max(math_ops.abs(math_ops.matmul(weights[:,:output_size//4], weights[:,:output_size//4], transpose_a=True))-eye))
        #coherence_loss += math_ops.abs(math_ops.reduce_max(math_ops.abs(mat) - eye))
        #f_co
        #coherence_loss  += math_ops.abs(math_ops.reduce_max(math_ops.abs(math_ops.matmul(weights[:,output_size//4 : output_size//2], weights[:,output_size//4 : output_size//2], transpose_a=True))-eye))
        #o_co
        #coherence_loss += math_ops.abs(math_ops.reduce_max(math_ops.abs(math_ops.matmul(weights[:,output_size//2 : 3*output_size//4], weights[:,output_size//2 : 3*output_size//4], transpose_a=True))-eye))
        #g_co
        #coherence_loss += math_ops.abs(math_ops.reduce_max(math_ops.abs(math_ops.matmul(weights[:,3*output_size//4 :], weights[:,3*output_size//4 :], transpose_a=True))-eye))
        #coherence_loss *= coherence*0.25#*(i_co+f_co+o_co+g_co)

        #ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, coherence_loss)
        if len(args) == 1:
            res = math_ops.matmul(args[0], weights)
        else:
            res = math_ops.matmul(array_ops.concat(args, 1), weights)
        if not bias:
            return res
        with vs.variable_scope(outer_scope) as inner_scope:
            inner_scope.set_partitioner(None)
            if bias_initializer is None:
                bias_initializer = init_ops.constant_initializer(0.0,
                                                                 dtype=dtype)
            biases = vs.get_variable(_BIAS_VARIABLE_NAME, [output_size],
                                     dtype=dtype,
                                     initializer=bias_initializer)
        return nn_ops.bias_add(res, biases)
Example #38
0
def custom_bidirectional_rnn(cell_fw,
                             cell_bw,
                             inputs,
                             initial_state_fw=None,
                             initial_state_bw=None,
                             dtype=None,
                             sequence_length=None,
                             scope=None):
    """Creates a bidirectional recurrent neural network.
    Similar to the unidirectional case above (rnn) but takes input and builds
    independent forward and backward RNNs with the final forward and backward
    outputs depth-concatenated, such that the output will have the format
    [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of
    forward and backward cell must match. The initial state for both directions
    is zero by default (but can be set optionally) and no intermediate states are
    ever returned -- the network is fully unrolled for the given (passed in)
    length(s) of the sequence(s) or completely unrolled if length(s) is not given.
    Args:
        cell_fw: An instance of RNNCell, to be used for forward direction.
        cell_bw: An instance of RNNCell, to be used for backward direction.
        inputs: A length T list of inputs, each a tensor of shape
            [batch_size, input_size], or a nested tuple of such elements.
        initial_state_fw: (optional) An initial state for the forward RNN.
            This must be a tensor of appropriate type and shape
            `[batch_size, cell_fw.state_size]`.
            If `cell_fw.state_size` is a tuple, this should be a tuple of
            tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
        initial_state_bw: (optional) Same as for `initial_state_fw`, but using
            the corresponding properties of `cell_bw`.
        dtype: (optional) The data type for the initial state.  Required if
            either of the initial states are not provided.
        sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
            containing the actual lengths for each of the sequences.
        scope: VariableScope for the created subgraph; defaults to "BiRNN"
    Returns:
        A tuple (outputs, output_state_fw, output_state_bw) where:
            outputs is a length `T` list of outputs (one for each input), which
                are depth-concatenated forward and backward outputs.
            output_state_fw is the final state of the forward rnn.
            output_state_bw is the final state of the backward rnn.
    Raises:
        TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
        ValueError: If inputs is None or an empty list.
    """

    if not isinstance(cell_fw, tf.compat.v1.nn.rnn_cell.RNNCell):
        raise TypeError("cell_fw must be an instance of RNNCell")
    if not isinstance(cell_bw, tf.compat.v1.nn.rnn_cell.RNNCell):
        raise TypeError("cell_bw must be an instance of RNNCell")
    if not nest.is_sequence(inputs):
        raise TypeError("inputs must be a sequence")
    if not inputs:
        raise ValueError("inputs must not be empty")

    with vs.variable_scope(scope or "bidirectional_rnn"):
        # Forward direction
        with vs.variable_scope("fw") as fw_scope:
            output_fw, output_state_fw, fw_states = custom_rnn(
                cell_fw,
                inputs,
                initial_state_fw,
                dtype,
                sequence_length,
                scope=fw_scope)

        # Backward direction
        with vs.variable_scope("bw") as bw_scope:
            reversed_inputs = _reverse_seq(inputs, sequence_length)
            tmp, output_state_bw, tmp_states = custom_rnn(cell_bw,
                                                          reversed_inputs,
                                                          initial_state_bw,
                                                          dtype,
                                                          sequence_length,
                                                          scope=bw_scope)

    output_bw = _reverse_seq(tmp, sequence_length)
    bw_states = _reverse_seq(tmp_states, sequence_length)

    # Concat each of the forward/backward outputs
    flat_output_fw = nest.flatten(output_fw)
    flat_output_bw = nest.flatten(output_bw)

    flat_outputs = tuple(
        array_ops.concat(values=[fw, bw], axis=1)
        for fw, bw in zip(flat_output_fw, flat_output_bw))

    outputs = nest.pack_sequence_as(structure=output_fw,
                                    flat_sequence=flat_outputs)

    return (outputs, output_state_fw, output_state_bw, fw_states, bw_states)
Example #39
0
def _linear(args,
            output_size,
            bias,
            bias_start=0.0,
            weights_init=None,
            trainable=True,
            restore=True,
            reuse=False,
            scope=None):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

    Arguments:
        args: a 2D Tensor or a list of 2D, batch x n, Tensors.
        output_size: int, second dimension of W[i].
        bias: boolean, whether to add a bias term or not.
        bias_start: starting value to initialize the bias; 0 by default.
        scope: VariableScope for the created subgraph; defaults to "Linear".

    Returns:
        A 2D Tensor with shape [batch x output_size] equal to
        sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

    Raises:
        ValueError: if some of the arguments has unspecified or wrong shape.
    """
    if args is None or (is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape().as_list() for a in args]
    for shape in shapes:
        if len(shape) != 2:
            raise ValueError("Linear is expecting 2D arguments: %s" %
                             str(shapes))
        if not shape[1]:
            raise ValueError("Linear expects shape[1] of arguments: %s" %
                             str(shapes))
        else:
            total_arg_size += shape[1]

    # Now the computation.
    with tf.variable_scope(scope or "Linear", reuse=reuse):
        matrix = va.variable("Matrix", [total_arg_size, output_size],
                             initializer=weights_init,
                             trainable=trainable,
                             restore=restore)
        if len(args) == 1:
            res = tf.matmul(args[0], matrix)
        else:
            res = tf.matmul(array_ops.concat(args, 1), matrix)
        if not bias:
            return res
        bias_term = va.variable(
            "Bias", [output_size],
            initializer=tf.constant_initializer(bias_start),
            trainable=trainable,
            restore=restore)
    return res + bias_term
Example #40
0
def static_rnn(cell, inputs, initial_state=None, dtype=None,
               sequence_length=None, scope=None, fg=None):
  """Creates a recurrent neural network specified by RNNCell `cell`.
  The simplest form of RNN network generated is:
  ```python
    state = cell.zero_state(...)
    outputs = []
    for input_ in inputs:
      output, state = cell(input_, state)
      outputs.append(output)
    return (outputs, state)
  ```
  However, a few other options are available:
  An initial state can be provided.
  If the sequence_length vector is provided, dynamic calculation is performed.
  This method of calculation does not compute the RNN steps past the maximum
  sequence length of the minibatch (thus saving computational time),
  and properly propagates the state at an example's sequence length
  to the final state output.
  The dynamic calculation performed is, at time `t` for batch row `b`,
  ```python
    (output, state)(b, t) =
      (t >= sequence_length(b))
        ? (zeros(cell.output_size), states(b, sequence_length(b) - 1))
        : cell(input(b, t), state(b, t - 1))
  ```
  Args:
    cell: An instance of RNNCell.
    inputs: A length T list of inputs, each a `Tensor` of shape
      `[batch_size, input_size]`, or a nested tuple of such elements.
    initial_state: (optional) An initial state for the RNN.
      If `cell.state_size` is an integer, this must be
      a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
      If `cell.state_size` is a tuple, this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    dtype: (optional) The data type for the initial state and expected output.
      Required if initial_state is not provided or RNN state has a heterogeneous
      dtype.
    sequence_length: Specifies the length of each sequence in inputs.
      An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`.
    scope: VariableScope for the created subgraph; defaults to "rnn".
  Returns:
    A pair (outputs, state) where:
    - outputs is a length T list of outputs (one for each input), or a nested
      tuple of such elements.
    - state is the final state
  Raises:
    TypeError: If `cell` is not an instance of RNNCell.
    ValueError: If `inputs` is `None` or an empty list, or if the input depth
      (column size) cannot be inferred from inputs via shape inference.
  """

  # if not isinstance(cell, core_rnn_cell.RNNCell):
  #   raise TypeError("cell must be an instance of RNNCell")
  # if not nest.is_sequence(inputs):
  #   raise TypeError("inputs must be a sequence")
  # if not inputs:
  #   raise ValueError("inputs must not be empty")

  outputs = []
  # Create a new scope in which the caching device is either
  # determined by the parent scope, or is set to place the cached
  # Variable using the same placement as for the rest of the RNN.
  with vs.variable_scope(scope or "rnn") as varscope:
    if varscope.caching_device is None:
      varscope.set_caching_device(lambda op: op.device)

    # Obtain the first sequence of the input
    first_input = inputs
    while nest.is_sequence(first_input):
      first_input = first_input[0]

    # Temporarily avoid EmbeddingWrapper and seq2seq badness
    # TODO(lukaszkaiser): remove EmbeddingWrapper
    if first_input.get_shape().ndims != 1:

      input_shape = first_input.get_shape().with_rank_at_least(2)
      fixed_batch_size = input_shape[0]

      flat_inputs = nest.flatten(inputs)
      for flat_input in flat_inputs:
        input_shape = flat_input.get_shape().with_rank_at_least(2)
        batch_size, input_size = input_shape[0], input_shape[1:]
        fixed_batch_size.merge_with(batch_size)
        for i, size in enumerate(input_size):
          if size.value is None:
            raise ValueError(
                "Input size (dimension %d of inputs) must be accessible via "
                "shape inference, but saw value None." % i)
    else:
      fixed_batch_size = first_input.get_shape().with_rank_at_least(1)[0]

    if fixed_batch_size.value:
      batch_size = fixed_batch_size.value
    else:
      batch_size = array_ops.shape(first_input)[0]
    if initial_state is not None:
      state = initial_state
    else:
      if not dtype:
        raise ValueError("If no initial_state is provided, "
                         "dtype must be specified")
      state = cell.zero_state(batch_size, dtype)

    if sequence_length is not None:  # Prepare variables
      sequence_length = ops.convert_to_tensor(
          sequence_length, name="sequence_length")
      if sequence_length.get_shape().ndims not in (None, 1):
        raise ValueError(
            "sequence_length must be a vector of length batch_size")
      def _create_zero_output(output_size):
        # convert int to TensorShape if necessary
        size = _state_size_with_prefix(output_size, prefix=[batch_size])
        output = array_ops.zeros(
            array_ops.stack(size), _infer_state_dtype(dtype, state))
        shape = _state_size_with_prefix(
            output_size, prefix=[fixed_batch_size.value])
        output.set_shape(tensor_shape.TensorShape(shape))
        return output

      output_size = cell.output_size
      flat_output_size = nest.flatten(output_size)
      flat_zero_output = tuple(
          _create_zero_output(size) for size in flat_output_size)
      zero_output = nest.pack_sequence_as(structure=output_size,
                                          flat_sequence=flat_zero_output)

      sequence_length = math_ops.to_int32(sequence_length)
      min_sequence_length = math_ops.reduce_min(sequence_length)
      max_sequence_length = math_ops.reduce_max(sequence_length)

    for time, input_ in enumerate(inputs):
      if time > 0: varscope.reuse_variables()
      # pylint: disable=cell-var-from-loop
      #if fg: call_cell = lambda: cell(input_, state)#.apply(fg, state)
      #else : call_cell = lambda: cell(input_, state)
      call_cell = lambda: cell(input_, state)
      # pylint: enable=cell-var-from-loop
      if sequence_length is not None:
        (output, state) = _rnn_step(
            time=time,
            sequence_length=sequence_length,
            min_sequence_length=min_sequence_length,
            max_sequence_length=max_sequence_length,
            zero_output=zero_output,
            state=state,
            call_cell=call_cell,
            state_size=cell.state_size)
      else:
        (output, state) = call_cell()

      outputs.append(output)

    return (outputs, state)
Example #41
0
def _dynamic_rnn_loop(
    cell, inputs, initial_state, parallel_iterations, swap_memory,
    sequence_length=None):
  """Internal implementation of Dynamic RNN.

  Args:
    cell: An instance of RNNCell.
    inputs: A `Tensor` of shape [time, batch_size, input_size].
    initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if
      `cell.state_size` is a tuple, then this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    parallel_iterations: Positive Python int.
    swap_memory: A Python boolean
    sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].

  Returns:
    Tuple `(final_outputs, final_state)`.
    final_outputs:
      A `Tensor` of shape `[time, batch_size, cell.output_size]`.
    final_state:
      A `Tensor` matrix, or tuple of such matrices, matching in length
      and shapes to `initial_state`.

  Raises:
    ValueError: If the input depth cannot be inferred via shape inference
      from the inputs.
  """
  state = initial_state
  assert isinstance(parallel_iterations, int), "parallel_iterations must be int"

  # Construct an initial output
  input_shape = array_ops.shape(inputs)
  time_steps = input_shape[0]
  batch_size = input_shape[1]

  inputs_got_shape = inputs.get_shape().with_rank_at_least(3).as_list()
  const_time_steps = inputs_got_shape[0]
  const_batch_size = inputs_got_shape[1]
  const_depth = inputs_got_shape[2:]

  if const_depth is None:
    raise ValueError(
        "Input size (depth of inputs) must be accessible via shape inference, "
        "but saw value None.")

  # Prepare dynamic conditional copying of state & output
  zeros_size = _state_size_with_prefix(cell.output_size, prefix=[batch_size])
  zero_output = array_ops.zeros(array_ops.pack(zeros_size), inputs.dtype)

  if sequence_length is not None:
    min_sequence_length = math_ops.reduce_min(sequence_length)
    max_sequence_length = math_ops.reduce_max(sequence_length)

  time = array_ops.constant(0, dtype=dtypes.int32, name="time")

  state_size = cell.state_size
  state_is_tuple = nest.is_sequence(state_size)

  state = nest.flatten(state) if state_is_tuple else (state,)

  with ops.op_scope([], "dynamic_rnn") as scope:
    base_name = scope

  output_ta = tensor_array_ops.TensorArray(
      dtype=inputs.dtype, size=time_steps,
      tensor_array_name=base_name + "output")

  input_ta = tensor_array_ops.TensorArray(
      dtype=inputs.dtype, size=time_steps,
      tensor_array_name=base_name + "input")

  input_ta = input_ta.unpack(inputs)

  def _time_step(time, output_ta_t, *state):
    """Take a time step of the dynamic RNN.

    Args:
      time: int32 scalar Tensor.
      output_ta_t: `TensorArray`, the output with existing flow.
      *state: List of vector tensors.

    Returns:
      The tuple (time + 1, output_ta_t with updated flow) + new_state.
    """

    input_t = input_ta.read(time)
    # Restore some shape information
    input_t.set_shape([const_batch_size] + const_depth)

    # Pack state back up for use by cell
    state = (nest.pack_sequence_as(structure=state_size, flat_sequence=state)
             if state_is_tuple else state[0])

    call_cell = lambda: cell(input_t, state)

    if sequence_length is not None:
      (output, new_state) = _rnn_step(
          time=time,
          sequence_length=sequence_length,
          min_sequence_length=min_sequence_length,
          max_sequence_length=max_sequence_length,
          zero_output=zero_output,
          state=state,
          call_cell=call_cell,
          state_size=state_size,
          skip_conditionals=True)
    else:
      (output, new_state) = call_cell()

    # Pack state if using state tuples
    new_state = (
        tuple(nest.flatten(new_state)) if state_is_tuple else (new_state,))

    output_ta_t = output_ta_t.write(time, output)

    return (time + 1, output_ta_t) + new_state

  final_loop_vars = control_flow_ops.while_loop(
      cond=lambda time, *_: time < time_steps,
      body=_time_step,
      loop_vars=(time, output_ta) + tuple(state),
      parallel_iterations=parallel_iterations,
      swap_memory=swap_memory)

  (output_final_ta, final_state) = (final_loop_vars[1], final_loop_vars[2:])

  final_outputs = output_final_ta.pack()
  # Restore some shape information
  final_outputs_size = _state_size_with_prefix(
      cell.output_size, prefix=[const_time_steps, const_batch_size])
  final_outputs.set_shape(final_outputs_size)

  # Unpack final state if not using state tuples.
  final_state = (
      nest.pack_sequence_as(
          structure=cell.state_size, flat_sequence=final_state)
      if state_is_tuple else final_state[0])
  return (final_outputs, final_state)
Example #42
0
def map_fn(fn,
           elems,
           dtype=None,
           parallel_iterations=None,
           back_prop=True,
           swap_memory=False,
           infer_shape=True,
           name=None):
    """map on the list of tensors unpacked from `elems` on dimension 0.

  The simplest version of `map_fn` repeatedly applies the callable `fn` to a
  sequence of elements from first to last. The elements are made of the
  tensors unpacked from `elems`. `dtype` is the data type of the return
  value of `fn`. Users must provide `dtype` if it is different from
  the data type of `elems`.

  Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
  of the result tensor is `[values.shape[0]] + fn(values[0]).shape`.

  This method also allows multi-arity `elems` and output of `fn`.  If `elems`
  is a (possibly nested) list or tuple of tensors, then each of these tensors
  must have a matching first (unpack) dimension.  The signature of `fn` may
  match the structure of `elems`.  That is, if `elems` is
  `(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is:
  `fn = lambda (t1, [t2, t3, [t4, t5]]):`.

  Furthermore, `fn` may emit a different structure than its input.  For example,
  `fn` may look like: `fn = lambda t1: return (t1 + 1, t1 - 1)`.  In this case,
  the `dtype` parameter is not optional: `dtype` must be a type or (possibly
  nested) tuple of types matching the output of `fn`.

  To apply a functional operation to the nonzero elements of a SparseTensor
  one of the following methods is recommended. First, if the function is
  expressible as TensorFlow ops, use

  ```python
    result = SparseTensor(input.indices, fn(input.values), input.dense_shape)
  ```

  If, however, the function is not expressible as a TensorFlow op, then use

  ```python
  result = SparseTensor(
    input.indices, map_fn(fn, input.values), input.dense_shape)
  ```

  instead.

  When executing eagerly, map_fn does not execute in parallel even if
  `parallel_iterations` is set to a value > 1. You can still get the
  performance benefits of running a function in parallel by using the
  `tf.contrib.eager.defun` decorator,

  ```python
  # Assume the function being used in map_fn is fn.
  # To ensure map_fn calls fn in parallel, use the defun decorator.
  @tf.contrib.eager.defun
  def func(tensor):
    return tf.map_fn(fn, tensor)
  ```

  Note that if you use the defun decorator, any non-TensorFlow Python code
  that you may have written in your function won't get executed. See
  `tf.contrib.eager.defun` for more details. The recommendation would be to
  debug without defun but switch to defun to get performance benefits of
  running map_fn in parallel.

  Args:
    fn: The callable to be performed.  It accepts one argument, which will
      have the same (possibly nested) structure as `elems`.  Its output
      must have the same structure as `dtype` if one is provided, otherwise
      it must have the same structure as `elems`.
    elems: A tensor or (possibly nested) sequence of tensors, each of which
      will be unpacked along their first dimension.  The nested sequence
      of the resulting slices will be applied to `fn`.
    dtype: (optional) The output type(s) of `fn`.  If `fn` returns a structure
      of Tensors differing from the structure of `elems`, then `dtype` is not
      optional and must have the same structure as the output of `fn`.
    parallel_iterations: (optional) The number of iterations allowed to run
      in parallel. When graph building, the default value is 10. While executing
      eagerly, the default value is set to 1.
    back_prop: (optional) True enables support for back propagation.
    swap_memory: (optional) True enables GPU-CPU memory swapping.
    infer_shape: (optional) False disables tests for consistent output shapes.
    name: (optional) Name prefix for the returned tensors.

  Returns:
    A tensor or (possibly nested) sequence of tensors.  Each tensor packs the
    results of applying `fn` to tensors unpacked from `elems` along the first
    dimension, from first to last.

  Raises:
    TypeError: if `fn` is not callable or the structure of the output of
      `fn` and `dtype` do not match, or if elems is a SparseTensor.
    ValueError: if the lengths of the output of `fn` and `dtype` do not match.

  Examples:
    ```python
    elems = np.array([1, 2, 3, 4, 5, 6])
    squares = map_fn(lambda x: x * x, elems)
    # squares == [1, 4, 9, 16, 25, 36]
    ```

    ```python
    elems = (np.array([1, 2, 3]), np.array([-1, 1, -1]))
    alternate = map_fn(lambda x: x[0] * x[1], elems, dtype=tf.int64)
    # alternate == [-1, 2, -3]
    ```

    ```python
    elems = np.array([1, 2, 3])
    alternates = map_fn(lambda x: (x, -x), elems, dtype=(tf.int64, tf.int64))
    # alternates[0] == [1, 2, 3]
    # alternates[1] == [-1, -2, -3]
    ```
  """
    if not callable(fn):
        raise TypeError("fn must be callable.")

    if isinstance(elems, sparse_tensor.SparseTensor):
        raise TypeError(
            "To perform a map on the values of a sparse tensor use either "
            " SparseTensor(input.indices, fn(input.values), input.dense_shape) or "
            " SparseTensor(input.indices, map_fn(fn, input.values), "
            "input.dense_shape)")

    in_graph_mode = not context.executing_eagerly()
    # Set the default number of parallel_iterations depending on graph/eager mode.
    if in_graph_mode and not parallel_iterations:
        parallel_iterations = 10
    elif not in_graph_mode and not parallel_iterations:
        parallel_iterations = 1

    if not in_graph_mode and parallel_iterations > 1:
        logging.log_first_n(
            logging.WARN, "Setting parallel_iterations > 1 has no "
            "effect when executing eagerly. Consider calling map_fn"
            " with tf.contrib.eager.defun to execute fn in "
            "parallel.", 1)
        parallel_iterations = 1

    input_is_sequence = nest.is_sequence(elems)
    input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]

    def input_pack(x):
        return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0]

    if dtype is None:
        output_is_sequence = input_is_sequence
        output_flatten = input_flatten
        output_pack = input_pack
    else:
        output_is_sequence = nest.is_sequence(dtype)
        output_flatten = lambda x: nest.flatten(
            x) if output_is_sequence else [x]

        def output_pack(x):
            return (nest.pack_sequence_as(dtype, x)
                    if output_is_sequence else x[0])

    elems_flat = input_flatten(elems)

    with ops.name_scope(name, "map", elems_flat):
        # TODO(akshayka): Remove the in_graph_mode check once caching devices are
        # supported in Eager
        if in_graph_mode:
            # Any get_variable calls in fn will cache the first call locally
            # and not issue repeated network I/O requests for each iteration.
            varscope = vs.get_variable_scope()
            varscope_caching_device_was_none = False
            if varscope.caching_device is None:
                # TODO(ebrevdo): Change to using colocate_with here and in other
                # methods.
                varscope.set_caching_device(lambda op: op.device)
                varscope_caching_device_was_none = True

        elems_flat = [
            ops.convert_to_tensor(elem, name="elem") for elem in elems_flat
        ]

        dtype = dtype or input_pack([elem.dtype for elem in elems_flat])
        dtype_flat = output_flatten(dtype)

        # Convert elems to tensor array. n may be known statically.
        static_shape = elems_flat[0].shape
        if static_shape.ndims is not None and static_shape.ndims < 1:
            if len(elems_flat) == 1:
                raise ValueError(
                    "elems must be a 1+ dimensional Tensor, not a scalar")
            else:
                raise ValueError(
                    "elements in elems must be 1+ dimensional Tensors, not scalars"
                )
        n = (tensor_shape.dimension_value(static_shape[0])
             or array_ops.shape(elems_flat[0])[0])

        # TensorArrays are always flat
        elems_ta = [
            tensor_array_ops.TensorArray(dtype=elem.dtype,
                                         size=n,
                                         dynamic_size=False,
                                         infer_shape=True)
            for elem in elems_flat
        ]
        # Unpack elements
        elems_ta = [
            elem_ta.unstack(elem)
            for elem_ta, elem in zip(elems_ta, elems_flat)
        ]

        i = constant_op.constant(0)

        accs_ta = [
            tensor_array_ops.TensorArray(dtype=dt,
                                         size=n,
                                         dynamic_size=False,
                                         infer_shape=infer_shape)
            for dt in dtype_flat
        ]

        def compute(i, tas):
            """The loop body of map_fn.

      Args:
        i: the loop counter
        tas: the flat TensorArray accumulator list

      Returns:
        (i + 1, tas): the updated counter + updated TensorArrays

      Raises:
        TypeError: if dtype and packed_fn_values structure do not match
        ValueType: if dtype and packed_fn_values lengths do not match
      """
            packed_values = input_pack(
                [elem_ta.read(i) for elem_ta in elems_ta])
            packed_fn_values = fn(packed_values)
            nest.assert_same_structure(dtype or elems, packed_fn_values)
            flat_fn_values = output_flatten(packed_fn_values)
            tas = [
                ta.write(i, value) for (ta, value) in zip(tas, flat_fn_values)
            ]
            return (i + 1, tas)

        _, r_a = control_flow_ops.while_loop(
            lambda i, _: i < n,
            compute, (i, accs_ta),
            parallel_iterations=parallel_iterations,
            back_prop=back_prop,
            swap_memory=swap_memory,
            maximum_iterations=n)
        results_flat = [r.stack() for r in r_a]

        n_static = tensor_shape.Dimension(
            tensor_shape.dimension_value(
                elems_flat[0].get_shape().with_rank_at_least(1)[0]))
        for elem in elems_flat[1:]:
            n_static.merge_with(
                tensor_shape.Dimension(
                    tensor_shape.dimension_value(
                        elem.get_shape().with_rank_at_least(1)[0])))
        for r in results_flat:
            r.set_shape(
                tensor_shape.TensorShape(n_static).concatenate(
                    r.get_shape()[1:]))

        # TODO(akshayka): Remove the in_graph_mode check once caching devices are
        # supported in Eager
        if in_graph_mode and varscope_caching_device_was_none:
            varscope.set_caching_device(None)

        return output_pack(results_flat)
Example #43
0
def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
           swap_memory=False, infer_shape=True, name=None):
  """map on the list of tensors unpacked from `elems` on dimension 0.

  The simplest version of `map` repeatedly applies the callable `fn` to a
  sequence of elements from first to last. The elements are made of the
  tensors unpacked from `elems`. `dtype` is the data type of the return
  value of `fn`. Users must provide `dtype` if it is different from
  the data type of `elems`.

  Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
  of the result tensor is `[values.shape[0]] + fn(values[0]).shape`.

  This method also allows multi-arity `elems` and output of `fn`.  If `elems`
  is a (possibly nested) list or tuple of tensors, then each of these tensors
  must have a matching first (unpack) dimension.  The signature of `fn` may
  match the structure of `elems`.  That is, if `elems` is
  `(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is:
  `fn = lambda (t1, [t2, t3, [t4, t5]]):`.

  Furthermore, `fn` may emit a different structure than its input.  For example,
  `fn` may look like: `fn = lambda t1: return (t1 + 1, t1 - 1)`.  In this case,
  the `dtype` parameter is not optional: `dtype` must be a type or (possibly
  nested) tuple of types matching the output of `fn`.

  To apply a functional operation to the nonzero elements of a SparseTensor
  one of the following methods is recommended. First, if the function is
  expressible as TensorFlow ops, use

  ```python
    result = SparseTensor(input.indices, fn(input.values), input.dense_shape)
  ```

  If, however, the function is not expressible as a TensorFlow op, then use

  ```python
  result = SparseTensor(
    input.indices, map_fn(fn, input.values), input.dense_shape)
  ```

  instead.

  Args:
    fn: The callable to be performed.  It accepts one argument, which will
      have the same (possibly nested) structure as `elems`.  Its output
      must have the same structure as `dtype` if one is provided, otherwise
      it must have the same structure as `elems`.
    elems: A tensor or (possibly nested) sequence of tensors, each of which
      will be unpacked along their first dimension.  The nested sequence
      of the resulting slices will be applied to `fn`.
    dtype: (optional) The output type(s) of `fn`.  If `fn` returns a structure
      of Tensors differing from the structure of `elems`, then `dtype` is not
      optional and must have the same structure as the output of `fn`.
    parallel_iterations: (optional) The number of iterations allowed to run
      in parallel.
    back_prop: (optional) True enables support for back propagation.
    swap_memory: (optional) True enables GPU-CPU memory swapping.
    infer_shape: (optional) False disables tests for consistent output shapes.
    name: (optional) Name prefix for the returned tensors.

  Returns:
    A tensor or (possibly nested) sequence of tensors.  Each tensor packs the
    results of applying `fn` to tensors unpacked from `elems` along the first
    dimension, from first to last.

  Raises:
    TypeError: if `fn` is not callable or the structure of the output of
      `fn` and `dtype` do not match, or if elems is a SparseTensor.
    ValueError: if the lengths of the output of `fn` and `dtype` do not match.

  Examples:
    ```python
    elems = np.array([1, 2, 3, 4, 5, 6])
    squares = map_fn(lambda x: x * x, elems)
    # squares == [1, 4, 9, 16, 25, 36]
    ```

    ```python
    elems = (np.array([1, 2, 3]), np.array([-1, 1, -1]))
    alternate = map_fn(lambda x: x[0] * x[1], elems, dtype=tf.int64)
    # alternate == [-1, 2, -3]
    ```

    ```python
    elems = np.array([1, 2, 3])
    alternates = map_fn(lambda x: (x, -x), elems, dtype=(tf.int64, tf.int64))
    # alternates[0] == [1, 2, 3]
    # alternates[1] == [-1, -2, -3]
    ```
  """
  if not callable(fn):
    raise TypeError("fn must be callable.")

  if isinstance(elems, sparse_tensor.SparseTensor):
    raise TypeError(
        "To perform a map on the values of a sparse tensor use either "
        " SparseTensor(input.indices, fn(input.values), input.dense_shape) or "
        " SparseTensor(input.indices, map_fn(fn, input.values), "
        "input.dense_shape)")

  input_is_sequence = nest.is_sequence(elems)
  input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]
  def input_pack(x):
    return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0]

  if dtype is None:
    output_is_sequence = input_is_sequence
    output_flatten = input_flatten
    output_pack = input_pack
  else:
    output_is_sequence = nest.is_sequence(dtype)
    output_flatten = lambda x: nest.flatten(x) if output_is_sequence else [x]
    def output_pack(x):
      return (nest.pack_sequence_as(dtype, x)
              if output_is_sequence else x[0])

  elems_flat = input_flatten(elems)

  with ops.name_scope(name, "map", elems_flat):
    # Any get_variable calls in fn will cache the first call locally
    # and not issue repeated network I/O requests for each iteration.
    varscope = vs.get_variable_scope()
    varscope_caching_device_was_none = False
    if varscope.caching_device is None:
      # TODO(ebrevdo): Change to using colocate_with here and in other methods.
      varscope.set_caching_device(lambda op: op.device)
      varscope_caching_device_was_none = True

    elems_flat = [
        ops.convert_to_tensor(elem, name="elem") for elem in elems_flat]

    dtype = dtype or input_pack([elem.dtype for elem in elems_flat])
    dtype_flat = output_flatten(dtype)

    # Convert elems to tensor array.
    n = array_ops.shape(elems_flat[0])[0]

    # TensorArrays are always flat
    elems_ta = [
        tensor_array_ops.TensorArray(dtype=elem.dtype, size=n,
                                     dynamic_size=False,
                                     infer_shape=True)
        for elem in elems_flat]
    # Unpack elements
    elems_ta = [
        elem_ta.unpack(elem) for elem_ta, elem in zip(elems_ta, elems_flat)]

    i = constant_op.constant(0)

    accs_ta = [
        tensor_array_ops.TensorArray(dtype=dt, size=n,
                                     dynamic_size=False,
                                     infer_shape=infer_shape)
        for dt in dtype_flat]

    def compute(i, tas):
      """The loop body of map_fn.

      Args:
        i: the loop counter
        tas: the flat TensorArray accumulator list

      Returns:
        (i + 1, tas): the updated counter + updated TensorArrays

      Raises:
        TypeError: if dtype and packed_fn_values structure do not match
        ValueType: if dtype and packed_fn_values lengths do not match
      """
      packed_values = input_pack([elem_ta.read(i) for elem_ta in elems_ta])
      packed_fn_values = fn(packed_values)
      nest.assert_same_structure(dtype or elems, packed_fn_values)
      flat_fn_values = output_flatten(packed_fn_values)
      tas = [ta.write(i, value) for (ta, value) in zip(tas, flat_fn_values)]
      return (i + 1, tas)

    _, r_a = control_flow_ops.while_loop(
        lambda i, _: i < n, compute, (i, accs_ta),
        parallel_iterations=parallel_iterations,
        back_prop=back_prop,
        swap_memory=swap_memory)
    results_flat = [r.pack() for r in r_a]

    n_static = elems_flat[0].get_shape().with_rank_at_least(1)[0]
    for elem in elems_flat[1:]:
      n_static.merge_with(elem.get_shape().with_rank_at_least(1)[0])
    for r in results_flat:
      r.set_shape(tensor_shape.TensorShape(n_static).concatenate(
          r.get_shape()[1:]))

    if varscope_caching_device_was_none:
      varscope.set_caching_device(None)

    return output_pack(results_flat)
    def __init__(self,
                 cell,
                 beam_size,
                 stop_token,
                 initial_state,
                 initial_input,
                 score_upper_bound=None,
                 max_len=100,
                 outputs_to_score_fn=None,
                 tokens_to_inputs_fn=None,
                 cell_transform='default',
                 scope=None):
        self.beam_size = beam_size
        self.stop_token = stop_token
        self.max_len = max_len
        self.scope = scope

        if score_upper_bound is None and outputs_to_score_fn is None:
            self.score_upper_bound = 0.0
        elif score_upper_bound is None or score_upper_bound > 3e38:
            # Note: 3e38 is just a little smaller than the largest float32
            # Second condition allows for Infinity as a synonym for None
            self.score_upper_bound = None
        else:
            self.score_upper_bound = float(score_upper_bound)

        if self.max_len is None and self.score_upper_bound is None:
            raise ValueError(
                "Beam search needs a stopping criterion. Please provide max_len or score_upper_bound."
            )

        if cell_transform == 'default':
            if type(cell) in [LSTMCell, BasicLSTMCell, MultiRNNCell]:
                cell_transform = 'flatten'
            else:
                cell_transform = 'replicate'

        if cell_transform == 'none':
            self.cell = cell
            self.initial_state = initial_state
            self.initial_input = initial_input
        elif cell_transform == 'flatten':
            self.cell = BeamFlattenWrapper(cell, self.beam_size)
            self.initial_state = self.cell.tile_along_beam(initial_state)
            self.initial_input = self.cell.tile_along_beam(initial_input)
        elif cell_transform == 'replicate':
            self.cell = BeamReplicateWrapper(cell, self.beam_size)
            self.initial_state = self.cell.tile_along_beam(initial_state)
            self.initial_input = self.cell.tile_along_beam(initial_input)
        else:
            raise ValueError(
                "cell_transform must be one of: 'default', 'flatten', 'replicate', 'none'"
            )

        self._cell_transform_used = cell_transform

        if outputs_to_score_fn is not None:
            self.outputs_to_score_fn = outputs_to_score_fn
        if tokens_to_inputs_fn is not None:
            self.tokens_to_inputs_fn = tokens_to_inputs_fn

        batch_size = tf.Dimension(None)
        if not nest.is_sequence(self.initial_state):
            batch_size = batch_size.merge_with(
                self.initial_state.get_shape()[0])
        else:
            for tensor in nest.flatten(self.initial_state):
                batch_size = batch_size.merge_with(tensor.get_shape()[0])

        if not nest.is_sequence(self.initial_input):
            batch_size = batch_size.merge_with(
                self.initial_input.get_shape()[0])
        else:
            for tensor in nest.flatten(self.initial_input):
                batch_size = batch_size.merge_with(tensor.get_shape()[0])

        self.inferred_batch_size = batch_size.value
        if self.inferred_batch_size is not None:
            self.batch_size = self.inferred_batch_size
        else:
            if not nest.is_sequence(self.initial_state):
                self.batch_size = tf.shape(self.initial_state)[0]
            else:
                self.batch_size = tf.shape(
                    list(nest.flatten(self.initial_state))[0])[0]

        self.inferred_batch_size_times_beam_size = None
        if self.inferred_batch_size is not None:
            self.inferred_batch_size_times_beam_size = self.inferred_batch_size * self.beam_size

        self.batch_size_times_beam_size = self.batch_size * self.beam_size
Example #45
0
def ensure_square(shape):
    if not nest.is_sequence(shape):
        shape = (shape, shape)

    return shape
Example #46
0
def scan(fn,
         elems,
         initializer=None,
         parallel_iterations=10,
         back_prop=True,
         swap_memory=False,
         infer_shape=True,
         reverse=False,
         name=None):
    """scan on the list of tensors unpacked from `elems` on dimension 0.

  The simplest version of `scan` repeatedly applies the callable `fn` to a
  sequence of elements from first to last. The elements are made of the tensors
  unpacked from `elems` on dimension 0. The callable fn takes two tensors as
  arguments. The first argument is the accumulated value computed from the
  preceding invocation of fn. If `initializer` is None, `elems` must contain
  at least one element, and its first element is used as the initializer.

  Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
  of the result tensor is `[len(values)] + fn(initializer, values[0]).shape`.
  If reverse=True, it's fn(initializer, values[-1]).shape.

  This method also allows multi-arity `elems` and accumulator.  If `elems`
  is a (possibly nested) list or tuple of tensors, then each of these tensors
  must have a matching first (unpack) dimension.  The second argument of
  `fn` must match the structure of `elems`.

  If no `initializer` is provided, the output structure and dtypes of `fn`
  are assumed to be the same as its input; and in this case, the first
  argument of `fn` must match the structure of `elems`.

  If an `initializer` is provided, then the output of `fn` must have the same
  structure as `initializer`; and the first argument of `fn` must match
  this structure.

  For example, if `elems` is `(t1, [t2, t3])` and `initializer` is
  `[i1, i2]` then an appropriate signature for `fn` in `python2` is:
  `fn = lambda (acc_p1, acc_p2), (t1, [t2, t3]):` and `fn` must return a list,
  `[acc_n1, acc_n2]`.  An alternative correct signature for `fn`, and the
   one that works in `python3`, is:
  `fn = lambda a, t:`, where `a` and `t` correspond to the input tuples.

  Args:
    fn: The callable to be performed.  It accepts two arguments.  The first will
      have the same structure as `initializer` if one is provided, otherwise it
      will have the same structure as `elems`.  The second will have the same
      (possibly nested) structure as `elems`.  Its output must have the same
      structure as `initializer` if one is provided, otherwise it must have the
      same structure as `elems`.
    elems: A tensor or (possibly nested) sequence of tensors, each of which will
      be unpacked along their first dimension.  The nested sequence of the
      resulting slices will be the first argument to `fn`.
    initializer: (optional) A tensor or (possibly nested) sequence of tensors,
      initial value for the accumulator, and the expected output type of `fn`.
    parallel_iterations: (optional) The number of iterations allowed to run in
      parallel.
    back_prop: (optional) True enables support for back propagation.
    swap_memory: (optional) True enables GPU-CPU memory swapping.
    infer_shape: (optional) False disables tests for consistent output shapes.
    reverse: (optional) True scans the tensor last to first (instead of first to
      last).
    name: (optional) Name prefix for the returned tensors.

  Returns:
    A tensor or (possibly nested) sequence of tensors.  Each tensor packs the
    results of applying `fn` to tensors unpacked from `elems` along the first
    dimension, and the previous accumulator value(s), from first to last (or
    last to first, if `reverse=True`).

  Raises:
    TypeError: if `fn` is not callable or the structure of the output of
      `fn` and `initializer` do not match.
    ValueError: if the lengths of the output of `fn` and `initializer`
      do not match.

  Examples:
    ```python
    elems = np.array([1, 2, 3, 4, 5, 6])
    sum = scan(lambda a, x: a + x, elems)
    # sum == [1, 3, 6, 10, 15, 21]
    sum = scan(lambda a, x: a + x, elems, reverse=True)
    # sum == [21, 20, 18, 15, 11, 6]
    ```

    ```python
    elems = np.array([1, 2, 3, 4, 5, 6])
    initializer = np.array(0)
    sum_one = scan(
        lambda a, x: x[0] - x[1] + a, (elems + 1, elems), initializer)
    # sum_one == [1, 2, 3, 4, 5, 6]
    ```

    ```python
    elems = np.array([1, 0, 0, 0, 0, 0])
    initializer = (np.array(0), np.array(1))
    fibonaccis = scan(lambda a, _: (a[1], a[0] + a[1]), elems, initializer)
    # fibonaccis == ([1, 1, 2, 3, 5, 8], [1, 2, 3, 5, 8, 13])
    ```
  """
    if not callable(fn):
        raise TypeError("fn must be callable.")

    input_is_sequence = nest.is_sequence(elems)
    input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]

    def input_pack(x):
        return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0]

    if initializer is None:
        output_is_sequence = input_is_sequence
        output_flatten = input_flatten
        output_pack = input_pack
    else:
        output_is_sequence = nest.is_sequence(initializer)
        output_flatten = lambda x: nest.flatten(
            x) if output_is_sequence else [x]

        def output_pack(x):
            return (nest.pack_sequence_as(initializer, x)
                    if output_is_sequence else x[0])

    elems_flat = input_flatten(elems)

    in_graph_mode = not context.executing_eagerly()
    with ops.name_scope(name, "scan", elems_flat):
        # TODO(akshayka): Remove the in_graph_mode check once caching devices are
        # supported in Eager
        if in_graph_mode:
            # Any get_variable calls in fn will cache the first call locally
            # and not issue repeated network I/O requests for each iteration.
            varscope = vs.get_variable_scope()
            varscope_caching_device_was_none = False
            if varscope.caching_device is None:
                # TODO(ebrevdo): Change to using colocate_with here and in other
                # methods.
                varscope.set_caching_device(lambda op: op.device)
                varscope_caching_device_was_none = True

        # Convert elems to tensor array.
        elems_flat = [
            ops.convert_to_tensor(elem, name="elem") for elem in elems_flat
        ]

        # Convert elems to tensor array. n may be known statically.
        n = tensor_shape.dimension_value(elems_flat[0].shape[0])
        if n is None:
            n = array_ops.shape(elems_flat[0])[0]

        # TensorArrays are always flat
        elems_ta = [
            tensor_array_ops.TensorArray(dtype=elem.dtype,
                                         size=n,
                                         dynamic_size=False,
                                         element_shape=elem.shape[1:],
                                         infer_shape=True)
            for elem in elems_flat
        ]
        # Unpack elements
        elems_ta = [
            elem_ta.unstack(elem)
            for elem_ta, elem in zip(elems_ta, elems_flat)
        ]

        if initializer is None:
            a_flat = [elem.read(n - 1 if reverse else 0) for elem in elems_ta]
            i = 1
        else:
            initializer_flat = output_flatten(initializer)
            a_flat = [ops.convert_to_tensor(init) for init in initializer_flat]
            i = 0

        # Create a tensor array to store the intermediate values.
        accs_ta = [
            tensor_array_ops.TensorArray(
                dtype=init.dtype,
                size=n,
                element_shape=init.shape if infer_shape else None,
                dynamic_size=False,
                infer_shape=infer_shape) for init in a_flat
        ]

        if initializer is None:
            accs_ta = [
                acc_ta.write(n - 1 if reverse else 0, a)
                for (acc_ta, a) in zip(accs_ta, a_flat)
            ]

        def compute(i, a_flat, tas):
            """The loop body of scan.

      Args:
        i: the loop counter.
        a_flat: the accumulator value(s), flattened.
        tas: the output accumulator TensorArray(s), flattened.

      Returns:
        [i + 1, a_flat, tas]: the updated counter + new accumulator values +
          updated TensorArrays

      Raises:
        TypeError: if initializer and fn() output structure do not match
        ValueType: if initializer and fn() output lengths do not match
      """
            packed_elems = input_pack(
                [elem_ta.read(i) for elem_ta in elems_ta])
            packed_a = output_pack(a_flat)
            a_out = fn(packed_a, packed_elems)
            nest.assert_same_structure(
                elems if initializer is None else initializer, a_out)
            flat_a_out = output_flatten(a_out)
            tas = [ta.write(i, value) for (ta, value) in zip(tas, flat_a_out)]
            if reverse:
                next_i = i - 1
            else:
                next_i = i + 1
            return (next_i, flat_a_out, tas)

        if reverse:
            initial_i = n - 1 - i
            condition = lambda i, _1, _2: i >= 0
        else:
            initial_i = i
            condition = lambda i, _1, _2: i < n
        _, _, r_a = control_flow_ops.while_loop(
            condition,
            compute, (initial_i, a_flat, accs_ta),
            parallel_iterations=parallel_iterations,
            back_prop=back_prop,
            swap_memory=swap_memory,
            maximum_iterations=n)

        results_flat = [r.stack() for r in r_a]

        n_static = tensor_shape.Dimension(
            tensor_shape.dimension_value(
                elems_flat[0].get_shape().with_rank_at_least(1)[0]))
        for elem in elems_flat[1:]:
            n_static.merge_with(
                tensor_shape.Dimension(
                    tensor_shape.dimension_value(
                        elem.get_shape().with_rank_at_least(1)[0])))
        for r in results_flat:
            r.set_shape(
                tensor_shape.TensorShape(n_static).concatenate(
                    r.get_shape()[1:]))

        # TODO(akshayka): Remove the in_graph_mode check once caching devices are
        # supported in Eager
        if in_graph_mode and varscope_caching_device_was_none:
            varscope.set_caching_device(None)

        return output_pack(results_flat)
Example #47
0
    def __init__(self,
                 args,
                 output_size,
                 build_bias,
                 bias_initializer=None,
                 kernel_initializer=None):

        self._build_bias = build_bias

        if args is None or (nest.is_sequence(args) and not args):
            raise ValueError("`args` must be specified")

        if not nest.is_sequence(args):

            args = [args]

            self._is_sequence = False

        else:

            self._is_sequence = True

        # Calculate the total size of arguments on dimension 1.

        total_arg_size = 0

        shapes = [a.get_shape() for a in args]

        for shape in shapes:

            if shape.ndims != 2:
                raise ValueError("linear is expecting 2D arguments: %s" %
                                 shapes)

            if shape[1].value is None:

                raise ValueError(
                    "linear expects shape[1] to be provided for shape %s, "
                    "but saw %s" % (shape, shape[1]))

            else:

                total_arg_size += shape[1].value

        dtype = [a.dtype for a in args][0]

        scope = vs.get_variable_scope()

        with vs.variable_scope(scope) as outer_scope:

            self._weights = vs.get_variable(_WEIGHTS_VARIABLE_NAME,
                                            [total_arg_size, output_size],
                                            dtype=dtype,
                                            initializer=kernel_initializer)

            if build_bias:

                with vs.variable_scope(outer_scope) as inner_scope:

                    inner_scope.set_partitioner(None)

                    if bias_initializer is None:
                        bias_initializer = init_ops.constant_initializer(
                            0.0, dtype=dtype)

                    self._biases = vs.get_variable(
                        _BIAS_VARIABLE_NAME, [output_size],
                        dtype=dtype,
                        initializer=bias_initializer)
Example #48
0
    def __call__(self, cell_inputs, state, scope=None):
        (
            past_beam_symbols,      # [batch_size*self.beam_size, :], right-aligned!!!
            past_beam_logprobs,     # [batch_size*self.beam_size]
            past_cell_states        # LSTM: ([batch_size*self.beam_size, :, dim],
                                    #        [batch_size*self.beam_size, :, dim])
                                    # GRU: [batch_size*self.beam_size, :, dim]
        ) = state

        past_cell_state = self.get_last_cell_state(past_cell_states)
        if self.use_copy and self.copy_fun == 'copynet':
            cell_output, cell_state, alignments, attns = \
                self.cell(cell_inputs, past_cell_state, scope)
        elif self.use_attention:
            cell_output, cell_state, alignments, attns = \
                self.cell(cell_inputs, past_cell_state, scope)
        else:
            cell_output, cell_state = \
                self.cell(cell_inputs, past_cell_state, scope)

        # [batch_size*beam_size, num_classes]
        if self.use_copy and self.copy_fun == 'copynet':
            logprobs = tf.math.log(cell_output)
        else:
            W, b = self.output_project
            if self.locally_normalized:
                logprobs = tf.nn.log_softmax(tf.matmul(cell_output, W) + b)
            else:
                logprobs = tf.matmul(cell_output, W) + b
        num_classes = logprobs.get_shape()[1]

        # stop_mask: indicates partial sequences ending with a stop token
        # [batch_size * beam_size]
        # x     0
        # _STOP 1
        # x     0
        # x     0
        input_symbols = past_beam_symbols[:, -1]
        stop_mask = tf.expand_dims(tf.cast(
            tf.equal(input_symbols, self.stop_token), tf.float32), 1)

        # done_mask: indicates stop token in the output vocabulary
        # [1, num_classes]
        # [- - _STOP - - -]
        # [0 0 1 0 0 0]
        done_mask = tf.cast(tf.reshape(tf.equal(tf.range(num_classes),
                                                self.stop_token),
                                       [1, num_classes]),
                            tf.float32)
        # set the next token distribution of partial sequences ending with
        # a stop token to:
        # [- - _STOP - - -]
        # [-inf -inf 0 -inf -inf -inf]
        logprobs = tf.add(logprobs, tf.multiply(
            stop_mask, -1e18 * (tf.ones_like(done_mask) - done_mask)))
        logprobs = tf.multiply(logprobs, (1 - tf.multiply(stop_mask, done_mask)))

        # length normalization
        past_logprobs_unormalized = \
            tf.multiply(past_beam_logprobs, tf.pow(self.seq_len, self.alpha))
        logprobs_unormalized = \
            tf.expand_dims(past_logprobs_unormalized, 1) + logprobs
        seq_len = tf.expand_dims(self.seq_len, 1) + (1 - stop_mask)
        logprobs_batched = tf.compat.v1.div(logprobs_unormalized, tf.pow(seq_len, self.alpha))

        beam_logprobs, indices = tf.nn.top_k(
            tf.reshape(logprobs_batched, [-1, self.beam_size * num_classes]),
            self.beam_size
        )
        beam_logprobs = tf.reshape(beam_logprobs, [-1])

        # For continuing to the next symbols
        parent_refs_offsets = \
                (tf.range(self.full_size) // self.beam_size) * self.beam_size
        symbols = indices % num_classes # [batch_size, self.beam_size]
        parent_refs = tf.reshape(indices // num_classes, [-1]) # [batch_size*self.beam_size]
        parent_refs = parent_refs + parent_refs_offsets

        beam_symbols = tf.concat(axis=1, values=[tf.gather(past_beam_symbols, parent_refs),
                                                 tf.reshape(symbols, [-1, 1])])
        self.seq_len = tf.squeeze(tf.gather(seq_len, parent_refs), axis=[1])

        if self.use_attention:
            ranked_alignments = nest_map(
                lambda element: tf.gather(element, parent_refs), alignments)
            ranked_attns = nest_map(
                lambda element: tf.gather(element, parent_refs), attns)

        # update cell_states
        def concat_and_gather_tuple_states(pc_states, c_state):
            rc_states = (
                tf.concat(axis=1, values=[pc_states[0], tf.expand_dims(c_state[0], 1)]),
                tf.concat(axis=1, values=[pc_states[1], tf.expand_dims(c_state[1], 1)])
            )
            c_states = (
                nest_map(lambda element: tf.gather(element, parent_refs), rc_states[0]),
                nest_map(lambda element: tf.gather(element, parent_refs), rc_states[1])
            )
            return c_states

        if nest.is_sequence(cell_state):
            if self.num_layers > 1:
                ranked_cell_states = [concat_and_gather_tuple_states(pc_states, c_state)
                    for pc_states, c_state in zip(past_cell_states, cell_state)]
            else:
                ranked_cell_states = concat_and_gather_tuple_states(
                    past_cell_states, cell_state)
        else:
            ranked_cell_states = tf.gather(
                tf.concat(axis=1, values=[past_cell_states, tf.expand_dims(cell_state, 1)]),
                parent_refs)

        compound_cell_state = (
            beam_symbols,
            beam_logprobs,
            ranked_cell_states
        )
        ranked_cell_output = tf.gather(cell_output, parent_refs)

        if self.use_copy and self.copy_fun == 'copynet':
            return ranked_cell_output, compound_cell_state, ranked_alignments, \
                   ranked_attns
        elif self.use_attention:
            return ranked_cell_output, compound_cell_state, ranked_alignments, \
                   ranked_attns
        else:
            return ranked_cell_output, compound_cell_state
Example #49
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Warning: when output_projection is None, the size of the attention vectors
  and variables will be made proportional to num_decoder_symbols, can be large.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(
      scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell,
        embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    encoder_outputs, encoder_state = rnn.rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [
        array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs
    ]
    # attention_states = array_ops.concat(top_states, 1)
    attention_states = array_ops.concat(1, top_states)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return embedding_attention_decoder(
          decoder_inputs,
          encoder_state,
          attention_states,
          cell,
          num_decoder_symbols,
          embedding_size,
          output_size=output_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          initial_state_attention=initial_state_attention)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse) as scope:
        outputs, state = embedding_attention_decoder(
            decoder_inputs,
            encoder_state,
            attention_states,
            cell,
            num_decoder_symbols,
            embedding_size,
            output_size=output_size,
            output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False,
            initial_state_attention=initial_state_attention)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(
          structure=encoder_state, flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
Example #50
0
def one2many_rnn_seq2seq(encoder_inputs,
                         decoder_inputs_dict,
                         cell,
                         num_encoder_symbols,
                         num_decoder_symbols_dict,
                         embedding_size,
                         feed_previous=False,
                         dtype=None,
                         scope=None):
    """One-to-many RNN sequence-to-sequence model (multi-task).

  This is a multi-task sequence-to-sequence model with one encoder and multiple
  decoders. Reference to multi-task sequence-to-sequence learning can be found
  here: http://arxiv.org/abs/1511.06114

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs_dict: A dictionany mapping decoder name (string) to
      the corresponding decoder_inputs; each decoder_inputs is a list of 1D
      Tensors of shape [batch_size]; num_decoders is defined as
      len(decoder_inputs_dict).
    cell: core_rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols_dict: A dictionary mapping decoder name (string) to an
      integer specifying number of symbols for the corresponding decoder;
      len(num_decoder_symbols_dict) must be equal to num_decoders.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of
      decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "one2many_rnn_seq2seq"

  Returns:
    A tuple of the form (outputs_dict, state_dict), where:
      outputs_dict: A mapping from decoder name (string) to a list of the same
        length as decoder_inputs_dict[name]; each element in the list is a 2D
        Tensors with shape [batch_size x num_decoder_symbol_list[name]]
        containing the generated outputs.
      state_dict: A mapping from decoder name (string) to the final state of the
        corresponding decoder RNN; it is a 2D Tensor of shape
        [batch_size x cell.state_size].
  """
    outputs_dict = {}
    state_dict = {}

    with variable_scope.variable_scope(scope or "one2many_rnn_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype

        # Encoder.
        encoder_cell = core_rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        _, encoder_state = core_rnn.static_rnn(encoder_cell,
                                               encoder_inputs,
                                               dtype=dtype)

        # Decoder.
        for name, decoder_inputs in decoder_inputs_dict.items():
            num_decoder_symbols = num_decoder_symbols_dict[name]

            with variable_scope.variable_scope("one2many_decoder_" +
                                               str(name)) as scope:
                decoder_cell = core_rnn_cell.OutputProjectionWrapper(
                    cell, num_decoder_symbols)
                if isinstance(feed_previous, bool):
                    outputs, state = embedding_rnn_decoder(
                        decoder_inputs,
                        encoder_state,
                        decoder_cell,
                        num_decoder_symbols,
                        embedding_size,
                        feed_previous=feed_previous)
                else:
                    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
                    def filled_embedding_rnn_decoder(feed_previous):
                        """The current decoder with a fixed feed_previous parameter."""
                        # pylint: disable=cell-var-from-loop
                        reuse = None if feed_previous else True
                        vs = variable_scope.get_variable_scope()
                        with variable_scope.variable_scope(vs, reuse=reuse):
                            outputs, state = embedding_rnn_decoder(
                                decoder_inputs,
                                encoder_state,
                                decoder_cell,
                                num_decoder_symbols,
                                embedding_size,
                                feed_previous=feed_previous)
                        # pylint: enable=cell-var-from-loop
                        state_list = [state]
                        if nest.is_sequence(state):
                            state_list = nest.flatten(state)
                        return outputs + state_list

                    outputs_and_state = control_flow_ops.cond(
                        feed_previous,
                        lambda: filled_embedding_rnn_decoder(True),
                        lambda: filled_embedding_rnn_decoder(False))
                    # Outputs length is the same as for decoder inputs.
                    outputs_len = len(decoder_inputs)
                    outputs = outputs_and_state[:outputs_len]
                    state_list = outputs_and_state[outputs_len:]
                    state = state_list[0]
                    if nest.is_sequence(encoder_state):
                        state = nest.pack_sequence_as(structure=encoder_state,
                                                      flat_sequence=state_list)
            outputs_dict[name] = outputs
            state_dict[name] = state

    return outputs_dict, state_dict
Example #51
0
def dynamic_seq2seq(encoder_inputs,
                                decoder_inputs,
                                query_inputs,
                                cell_encoder_fw,
                                cell_encoder_bw,
                                distraction_cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                initial_embedding = None,
                                num_heads=1,
                                embedding_trainable=False,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(
      scope or "dynamic_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype
    # Encoder.
    """encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    """
    if initial_embedding is not None:
      embedding = variable_scope.get_variable('embedding',
            initializer=initial_embedding, trainable=embedding_trainable)

    else:
      embedding = variable_scope.get_variable('embedding', [num_encoder_symbols, embedding_size],trainable=embedding_trainable)


    embedded_inputs = embedding_ops.embedding_lookup(embedding, encoder_inputs)

    embedded_inputs = array_ops.unpack(embedded_inputs)

    query_embeddings = embedding_ops.embedding_lookup(embedding, query_inputs)

    query_embeddings = array_ops.unpack(query_embeddings)


    print ("Embedded Inputs length:", len(embedded_inputs))

    print("Shape in embedded inputs:", embedded_inputs[0].get_shape())

    with variable_scope.variable_scope("Encoder_Cell"):
      encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.bidirectional_rnn(
          cell_encoder_fw, cell_encoder_bw, embedded_inputs, dtype=dtype)

    with variable_scope.variable_scope("Query_Cell"):

      query_outputs, query_state_fw, query_state_bw = rnn.bidirectional_rnn(
        cell_encoder_fw, cell_encoder_bw, query_embeddings, dtype = dtype)

    # First calculate a concatenation of encoder outputs to put attention on.

    encoder_state = array_ops.concat(1, [encoder_state_fw, encoder_state_bw])
    query_state = array_ops.concat(1, [query_state_fw, query_state_bw])

    top_states_encoder = [array_ops.reshape(e, [-1, 1, 2*cell_encoder_fw.output_size])
                          for e in encoder_outputs]
    attention_states_encoder = array_ops.concat(1, top_states_encoder)

    top_states_query = [array_ops.reshape(e, [-1, 1, 2*cell_encoder_fw.output_size]) for e in query_outputs]

    attention_states_query = array_ops.concat(1, top_states_query)


    # Decoder.
    output_size = None
    if output_projection is None:
      cell_encoder_fw = rnn_cell.OutputProjectionWrapper(cell_encoder_fw, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return dynamic_decoder_wrapper(
          decoder_inputs,
          initial_state=encoder_state,
          attention_state=attention_states_encoder,
          attention_states_query = attention_states_query,
          cell_encoder = cell_encoder_fw,
          num_symbols = num_decoder_symbols,
          embedding_size = embedding_size,
          distract_initial_state = encoder_state,
          num_heads=num_heads,
          output_size=output_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          embedding_scope = scope,
          initial_state_attention=initial_state_attention)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):

      reuse = None if feed_previous_bool else True

      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse) as scope:

        outputs, state, query_weights, doc_weights = dynamic_decoder_wrapper(
            decoder_inputs,
            initial_state=encoder_state,
            attention_states=attention_states_encoder,
            attention_states_query = attention_states_query,
            cell_encoder = cell_encoder_fw,
            num_symbols=num_decoder_symbols,
            embedding_size = embedding_size,
            distract_initial_state = encoder_state,
            distraction_cell = distraction_cell,
            num_heads=num_heads,
            output_size=output_size,
            output_projection=output_projection,
            feed_previous=feed_previous_bool,
            embedding_scope = scope,
            update_embedding_for_previous=False,
            initial_state_attention=initial_state_attention)

        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)

      print (len(outputs), len(state_list), len(query_weights), len(doc_weights))
      return outputs + state_list + query_weights + doc_weights

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    query_len = len(decoder_inputs)
    doc_len = len(decoder_inputs)
    state_list = outputs_and_state[outputs_len:-(query_len + doc_len)]
    m = len(state_list)
    print (outputs_len, query_len, doc_len, m, len(outputs_and_state))
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(structure=encoder_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state, outputs_and_state[outputs_len + m: outputs_len + m + query_len], outputs_and_state[outputs_len + m + query_len:]
Example #52
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False,
                                mc_search=False):

  with variable_scope.variable_scope(
      scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype
    # Encoder.
    
    # 改:encoder_cell = tf.contrib.rnn.EmbeddingWrapper(
    #     cell, embedding_classes=num_encoder_symbols,
    #     embedding_size=embedding_size)
    encoder_cell = core_rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    
    # 改:encoder_outputs, encoder_state =tf.contrib.rnn.static_rnn(
    #     encoder_cell, encoder_inputs, dtype=dtype)
    encoder_outputs, encoder_state =tf.nn.static_rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [array_ops.reshape(e, [-1, 1, cell.output_size])
                  for e in encoder_outputs]
    print(top_states)
    attention_states = array_ops.concat (top_states,1)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      outputs, state = embedding_attention_decoder(
          decoder_inputs,
          encoder_state,
          attention_states,
          cell,
          num_decoder_symbols,
          embedding_size,
          num_heads=num_heads,
          output_size=output_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          initial_state_attention=initial_state_attention,
          mc_search=mc_search,
          scope=scope)
      return outputs, state, encoder_state

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse) as scope:
        outputs, state = embedding_attention_decoder(
            decoder_inputs,
            encoder_state,
            attention_states,
            cell,
            num_decoder_symbols,
            embedding_size,
            num_heads=num_heads,
            output_size=output_size,
            output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False,
            initial_state_attention=initial_state_attention,
            mc_search=mc_search,
            scope=scope)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(structure=encoder_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state, encoder_state
Example #53
0
def _linear(args, output_size, bias, scope=None, use_fp16=False):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

    Args:
      args: a 2D Tensor or a list of 2D, batch x n, Tensors.
      output_size: int, second dimension of W[i].
      bias: boolean, whether to add a bias term or not.
      bias_start: starting value to initialize the bias; 0 by default.
      scope: VariableScope for the created subgraph; defaults to "Linear".

    Returns:
      A 2D Tensor with shape [batch x output_size] equal to
      sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

    Raises:
      ValueError: if some of the arguments has unspecified or wrong shape.
    """
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape().as_list() for a in args]
    for shape in shapes:
        if len(shape) != 2:
            raise ValueError("Linear is expecting 2D arguments: %s" %
                             str(shapes))
        if not shape[1]:
            raise ValueError("Linear expects shape[1] of arguments: %s" %
                             str(shapes))
        else:
            total_arg_size += shape[1]

    dtype = [a.dtype for a in args][0]

    # Now the computation.
    with tf.variable_scope(scope or "Linear"):
        matrix = _variable_on_cpu('Matrix', [total_arg_size, output_size],
                                  use_fp16=use_fp16)
        if use_fp16:
            dtype = tf.float16
        else:
            dtype = tf.float32
        args = [tf.cast(x, dtype) for x in args]
        if len(args) == 1:
            res = tf.matmul(args[0],
                            matrix,
                            transpose_a=False,
                            transpose_b=True)
        else:
            res = tf.matmul(tf.concat(args, 1),
                            matrix,
                            transpose_a=False,
                            transpose_b=True)
        if not bias:
            return res
        bias_term = _variable_on_cpu('Bias', [output_size],
                                     tf.constant_initializer(0),
                                     use_fp16=use_fp16)
    return res + bias_term
Example #54
0
def _as_tuple(value):
    if not nest.is_sequence(value):
        return value
    return tuple([_as_tuple(v) for v in value])
Example #55
0
def state_saving_rnn(cell, inputs, state_saver, state_name,
                     sequence_length=None, scope=None):
  """RNN that accepts a state saver for time-truncated RNN calculation.

  Args:
    cell: An instance of `RNNCell`.
    inputs: A length T list of inputs, each a tensor of shape
      `[batch_size, input_size]`.
    state_saver: A state saver object with methods `state` and `save_state`.
    state_name: Python string or tuple of strings.  The name to use with the
      state_saver. If the cell returns tuples of states (i.e.,
      `cell.state_size` is a tuple) then `state_name` should be a tuple of
      strings having the same length as `cell.state_size`.  Otherwise it should
      be a single string.
    sequence_length: (optional) An int32/int64 vector size [batch_size].
      See the documentation for rnn() for more details about sequence_length.
    scope: VariableScope for the created subgraph; defaults to "RNN".

  Returns:
    A pair (outputs, state) where:
      outputs is a length T list of outputs (one for each input)
      states is the final state

  Raises:
    TypeError: If `cell` is not an instance of RNNCell.
    ValueError: If `inputs` is `None` or an empty list, or if the arity and
     type of `state_name` does not match that of `cell.state_size`.
  """
  state_size = cell.state_size
  state_is_tuple = nest.is_sequence(state_size)
  state_name_tuple = nest.is_sequence(state_name)

  if state_is_tuple != state_name_tuple:
    raise ValueError(
        "state_name should be the same type as cell.state_size.  "
        "state_name: %s, cell.state_size: %s"
        % (str(state_name), str(state_size)))

  if state_is_tuple:
    state_name_flat = nest.flatten(state_name)
    state_size_flat = nest.flatten(state_size)

    if len(state_name_flat) != len(state_size_flat):
      raise ValueError("#elems(state_name) != #elems(state_size): %d vs. %d"
                       % (len(state_name_flat), len(state_size_flat)))

    initial_state = nest.pack_sequence_as(
        structure=state_name,
        flat_sequence=[state_saver.state(n) for n in state_name_flat])
  else:
    initial_state = state_saver.state(state_name)

  (outputs, state) = rnn(cell, inputs, initial_state=initial_state,
                         sequence_length=sequence_length, scope=scope)

  if state_is_tuple:
    state_flat = nest.flatten(state)
    save_state = [
        state_saver.save_state(n, s)
        for (n, s) in zip(state_name_flat, state_flat)]
  else:
    save_state = [state_saver.save_state(state_name, state)]

  with ops.control_dependencies(save_state):
    outputs[-1] = array_ops.identity(outputs[-1])

  return (outputs, state)
Example #56
0
def _linear(args,
            output_size,
            bias,
            bias_initializer=None,
            kernel_initializer=None):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
  Args:
    args: a 2D Tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    bias: boolean, whether to add a bias term or not.
    bias_initializer: starting value to initialize the bias
      (default is all zeros).
    kernel_initializer: starting value to initialize the weight.
  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape() for a in args]
    for shape in shapes:
        if shape.ndims != 2:
            raise ValueError("linear is expecting 2D arguments: %s" % shapes)
        if shape[1].value is None:
            raise ValueError(
                "linear expects shape[1] to be provided for shape %s, "
                "but saw %s" % (shape, shape[1]))
        else:
            total_arg_size += shape[1].value

    dtype = [a.dtype for a in args][0]

    # Now the computation.
    scope = vs.get_variable_scope()
    with vs.variable_scope(scope) as outer_scope:
        weights = vs.get_variable(_WEIGHTS_VARIABLE_NAME,
                                  [total_arg_size, output_size],
                                  dtype=dtype,
                                  initializer=kernel_initializer)
        if len(args) == 1:
            res = math_ops.matmul(args[0], weights)
        else:
            res = math_ops.matmul(array_ops.concat(args, 1), weights)
        if not bias:
            return res
        with vs.variable_scope(outer_scope) as inner_scope:
            inner_scope.set_partitioner(None)
            if bias_initializer is None:
                bias_initializer = init_ops.constant_initializer(0.0,
                                                                 dtype=dtype)
            biases = vs.get_variable(_BIAS_VARIABLE_NAME, [output_size],
                                     dtype=dtype,
                                     initializer=bias_initializer)
        return nn_ops.bias_add(res, biases)
Example #57
0
def _maxlens(x, d=0):
    n = len(x)
    ret = [(n, d)]
    if n > 0 and nest.is_sequence(x[0]):
        ret.extend(map(lambda y: _maxlens(y, d + 1), x))
    return nest.flatten(ret)
Example #58
0
def _rnn_step(
    time, sequence_length, min_sequence_length, max_sequence_length,
    zero_output, state, call_cell, state_size, skip_conditionals=False):
  """Calculate one step of a dynamic RNN minibatch.

  Returns an (output, state) pair conditioned on the sequence_lengths.
  When skip_conditionals=False, the pseudocode is something like:

  if t >= max_sequence_length:
    return (zero_output, state)
  if t < min_sequence_length:
    return call_cell()

  # Selectively output zeros or output, old state or new state depending
  # on if we've finished calculating each row.
  new_output, new_state = call_cell()
  final_output = np.vstack([
    zero_output if time >= sequence_lengths[r] else new_output_r
    for r, new_output_r in enumerate(new_output)
  ])
  final_state = np.vstack([
    state[r] if time >= sequence_lengths[r] else new_state_r
    for r, new_state_r in enumerate(new_state)
  ])
  return (final_output, final_state)

  Args:
    time: Python int, the current time step
    sequence_length: int32 `Tensor` vector of size [batch_size]
    min_sequence_length: int32 `Tensor` scalar, min of sequence_length
    max_sequence_length: int32 `Tensor` scalar, max of sequence_length
    zero_output: `Tensor` vector of shape [output_size]
    state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`,
      or a list/tuple of such tensors.
    call_cell: lambda returning tuple of (new_output, new_state) where
      new_output is a `Tensor` matrix of shape `[batch_size, output_size]`.
      new_state is a `Tensor` matrix of shape `[batch_size, state_size]`.
    state_size: The `cell.state_size` associated with the state.
    skip_conditionals: Python bool, whether to skip using the conditional
      calculations.  This is useful for `dynamic_rnn`, where the input tensor
      matches `max_sequence_length`, and using conditionals just slows
      everything down.

  Returns:
    A tuple of (`final_output`, `final_state`) as given by the pseudocode above:
      final_output is a `Tensor` matrix of shape [batch_size, output_size]
      final_state is either a single `Tensor` matrix, or a tuple of such
        matrices (matching length and shapes of input `state`).

  Raises:
    ValueError: If the cell returns a state tuple whose length does not match
      that returned by `state_size`.
  """

  state_is_tuple = nest.is_sequence(state)
  # Convert state to a list for ease of use
  state = list(nest.flatten(state)) if state_is_tuple else [state]
  state_shape = [s.get_shape() for s in state]

  def _copy_some_through(new_output, new_state):
    # Use broadcasting select to determine which values should get
    # the previous state & zero output, and which values should get
    # a calculated state & output.
    copy_cond = (time >= sequence_length)
    return ([math_ops.select(copy_cond, zero_output, new_output)]
            + [math_ops.select(copy_cond, old_s, new_s)
               for (old_s, new_s) in zip(state, new_state)])

  def _maybe_copy_some_through():
    """Run RNN step.  Pass through either no or some past state."""
    new_output, new_state = call_cell()
    new_state = (
        list(nest.flatten(new_state)) if state_is_tuple else [new_state])

    if len(state) != len(new_state):
      raise ValueError(
          "Input and output state tuple lengths do not match: %d vs. %d"
          % (len(state), len(new_state)))

    return control_flow_ops.cond(
        # if t < min_seq_len: calculate and return everything
        time < min_sequence_length, lambda: [new_output] + new_state,
        # else copy some of it through
        lambda: _copy_some_through(new_output, new_state))

  # TODO(ebrevdo): skipping these conditionals may cause a slowdown,
  # but benefits from removing cond() and its gradient.  We should
  # profile with and without this switch here.
  if skip_conditionals:
    # Instead of using conditionals, perform the selective copy at all time
    # steps.  This is faster when max_seq_len is equal to the number of unrolls
    # (which is typical for dynamic_rnn).
    new_output, new_state = call_cell()
    new_state = (
        list(nest.flatten(new_state)) if state_is_tuple else [new_state])

    if len(state) != len(new_state):
      raise ValueError(
          "Input and output state tuple lengths do not match: %d vs. %d"
          % (len(state), len(new_state)))

    final_output_and_state = _copy_some_through(new_output, new_state)
  else:
    empty_update = lambda: [zero_output] + list(state)

    final_output_and_state = control_flow_ops.cond(
        # if t >= max_seq_len: copy all state through, output zeros
        time >= max_sequence_length, empty_update,
        # otherwise calculation is required: copy some or all of it through
        _maybe_copy_some_through)

  (final_output, final_state) = (
      final_output_and_state[0], final_output_and_state[1:])

  final_output.set_shape(zero_output.get_shape())
  for final_state_i, state_shape_i in zip(final_state, state_shape):
    final_state_i.set_shape(state_shape_i)

  if state_is_tuple:
    return (
        final_output,
        nest.pack_sequence_as(structure=state_size, flat_sequence=final_state))
  else:
    return (final_output, final_state[0])
Example #59
0
    def _line_sep(self,
                  args,
                  output_size,
                  bias,
                  bias_initializer=None,
                  kernel_initializer=None):
        if args is None or (nest.is_sequence(args) and not args):
            raise ValueError("`args` must be specified")
        if not nest.is_sequence(args):
            args = [args]

        # Calculate the total size of arguments on dimension 1.
        total_arg_size = 0
        shapes = [a.get_shape() for a in args]
        for shape in shapes:
            if shape.ndims != 2:
                raise ValueError("linear is expecting 2D arguments: %s" %
                                 shapes)
            if shape[1].value is None:
                raise ValueError("linear expects shape[1] to \
	                            be provided for shape %s, "
                                 "but saw %s" % (shape, shape[1]))
            else:
                total_arg_size += shape[1].value

        dtype = [a.dtype for a in args][0]

        # Now the computation.
        scope = vs.get_variable_scope()
        with vs.variable_scope(scope) as outer_scope:

            [x, h] = args

            x_size = x.get_shape().as_list()[1]
            W_xh = tf.get_variable('W_xh', [x_size, output_size],
                                   initializer=weights_initializer)
            W_hh = tf.get_variable('W_hh', [int(output_size / 4), output_size],
                                   initializer=weights_initializer)

            #x = tf.Print(x,[tf.reduce_mean(x)], str(scope)+'x: ')
            #h = tf.Print(h,[tf.reduce_mean(h)], str(scope)+'h: ')

            #W_xh = tf.Print(W_xh,[tf.reduce_mean(W_xh)], str(scope)+'W_xh: ')
            #W_hh = tf.Print(W_hh,[tf.reduce_mean(W_hh)], str(scope)+'W_hh: ')

            cn_xh = self.cosine_norm(x, W_xh, 'cn_xh')  # one hot vector
            cn_hh = self.cosine_norm(h, W_hh, 'cn_hh')

            #cn_xh = tf.Print(cn_xh,[tf.reduce_mean(cn_xh)], str(scope)+'cn_xh: ')
            #cn_hh = tf.Print(cn_hh,[tf.reduce_mean(cn_hh)], str(scope)+'cn_hh: ')

            res = cn_xh + cn_hh

            if not bias:
                return res
            with vs.variable_scope(outer_scope) as inner_scope:
                inner_scope.set_partitioner(None)
                if bias_initializer is None:
                    bias_initializer = init_ops.constant_initializer(
                        0.0, dtype=dtype)
                biases = vs.get_variable(_BIAS_VARIABLE_NAME, [output_size],
                                         dtype=dtype,
                                         initializer=bias_initializer)
            return nn_ops.bias_add(res, biases)
Example #60
0
def BiRNNModel(cell_fw, cell_bw, inputs, initial_state_fw=None,
               initial_state_bw=None, dtype=None, sequence_length=None,
               num_cell_layers=None, scope=None):
  """Creates a bidirectional recurrent neural network.

  Similar to the unidirectional case above (rnn) but takes input and builds
  independent forward and backward RNNs with the final forward and backward
  outputs depth-concatenated, such that the output will have the format
  [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of
  forward and backward cell must match. The initial state for both directions
  is zero by default (but can be set optionally) and no intermediate states are
  ever returned -- the network is fully unrolled for the given (passed in)
  length(s) of the sequence(s) or completely unrolled if length(s) is not given.

  Args:
    cell_fw: An instance of RNNCell, to be used for forward direction.
    cell_bw: An instance of RNNCell, to be used for backward direction.
    inputs: A length T list of inputs, each a tensor of shape
      [batch_size, input_size].
    initial_state_fw: (optional) An initial state for the forward RNN.
      This must be a tensor of appropriate type and shape
      `[batch_size x cell_fw.state_size]`.
      If `cell_fw.state_size` is a tuple, this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
    initial_state_bw: (optional) Same as for `initial_state_fw`, but using
      the corresponding properties of `cell_bw`.
    dtype: (optional) The data type for the initial state.  Required if
      either of the initial states are not provided.
    sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
      containing the actual lengths for each of the sequences.
    num_cell_layers: Num of layers of the RNN cell. (Mainly used for generating
      output state representations for multi-layer RNN cells.)
    scope: VariableScope for the created subgraph; defaults to "BiRNN"

  Returns:
    A tuple (outputs, output_states) where:
      outputs is a length `T` list of outputs (one for each input), which
        are depth-concatenated forward and backward outputs.
      output_states is a length `T` list of hidden states (one for each step),
        which are depth-concatenated forward and backward states.

  Raises:
    TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
    ValueError: If inputs is None or an empty list.
  """

  if not isinstance(cell_fw, tf.nn.rnn_cell.RNNCell):
    raise TypeError("cell_fw must be an instance of RNNCell")
  if not isinstance(cell_bw, tf.nn.rnn_cell.RNNCell):
    raise TypeError("cell_bw must be an instance of RNNCell")
  if not isinstance(inputs, list):
    raise TypeError("inputs must be a list")
  if not inputs:
    raise ValueError("inputs must not be empty")

  name = scope or "BiRNN"
  # Forward direction
  with tf.variable_scope(name + "_FW") as fw_scope:
    output_fw, states_fw = RNNModel(cell_fw, inputs, initial_state_fw,
      dtype, sequence_length, num_cell_layers=num_cell_layers, scope=fw_scope)

  # Backward direction
  with tf.variable_scope(name + "_BW") as bw_scope:
    tmp, tmp_states = RNNModel(cell_bw, _reverse_seq(inputs, sequence_length),
      initial_state_bw, dtype, sequence_length,
      num_cell_layers=num_cell_layers, scope=bw_scope)
  output_bw = _reverse_seq(tmp, sequence_length)
  states_bw = _reverse_seq(tmp_states, sequence_length)

  # Concat each of the forward/backward outputs
  outputs = [tf.concat(axis=1, values=[fw, bw]) for fw, bw in zip(output_fw, output_bw)]

  # Notice that the computation of the encoder final state uses the final state
  # of the backward RNN without reverse!!!
  if nest.is_sequence(cell_fw.state_size):
    output_states = [nest_map_dual(lambda x, y: tf.concat(axis=1, values=[x, y]), fw, bw)
                     for fw, bw in zip(states_fw, tmp_states)]
  else:
    if num_cell_layers > 1:
      output_states = []
      for fw, bw in zip(states_fw, tmp_states):
        output_states.append(tf.concat(axis=1, values=[tf.concat(axis=1, values=[l_fw, l_bw])
          for l_fw, l_bw in zip(tf.split(axis=1, num_or_size_splits=num_cell_layers, value=fw),
            tf.split(axis=1, num_or_size_splits=num_cell_layers, value=bw))]))
    else:
      output_states = [tf.concat(axis=1, values=[fw, bw])
                       for fw, bw in zip(states_fw, tmp_states)]

  return (outputs, output_states)