def call(self, inputs, state):
        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        # get context from encoder outputs
        context = self._simple_attention(self._encoder_vector,
                                         self._encoder_proj, h)

        if self._linear is None:
            self._linear = _Linear([inputs, context, h], 4 * self._num_units,
                                   True)
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(
            value=self._linear([inputs, context, h]),
            num_or_size_splits=4,
            axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *
                 self._activation(j))
        new_h = self._activation(new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
Beispiel #2
0
def get_model_params(variable_prefix, split_lstm_matrices=True):
  if variable_prefix:
    exclude = [ variable_prefix+"/Variable", variable_prefix+"/Variable_1" ]
    tmp = { v.op.name: v.eval() for v in tf.global_variables() if (v.op.name.startswith(variable_prefix) and v.op.name not in exclude) }
  else:
    exclude = [ "Variable", "Variable_1" ]
    tmp = { v.op.name: v.eval() for v in tf.global_variables() if v.op.name not in exclude }
  # Rename keys
  params = {name.replace("/", "-"): param for name, param in tmp.items()}
  if split_lstm_matrices:
    for name in params.keys():
      if "LSTMCell" in name:
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        if "Matrix" in name:
          i, j, f, o = array_ops.split(1, 4, params[name])
        elif "Bias" in name:
          i, j, f, o = array_ops.split(0, 4, params[name])
        else:
          logging.error("Unknown tensor type..")
          exit(1)
        name_i = name.replace("LSTMCell", "LSTMCell-i")
        name_j = name.replace("LSTMCell", "LSTMCell-j")
        name_f = name.replace("LSTMCell", "LSTMCell-f")
        name_o = name.replace("LSTMCell", "LSTMCell-o")
        params[name_i] = i.eval()
        params[name_j] = j.eval()
        params[name_f] = f.eval()
        params[name_o] = o.eval()
        del params[name]
      elif "AttnV" in name:
        params[name] = array_ops.reshape(params[name], [ params[name].shape[0], 1 ]).eval()
      elif "AttnW" in name:
        # remove dims of size 1
        params[name] = tf.squeeze(params[name]).eval()
  return params
def _ragged_split(tensor, pieces):
  """Like split for 1D tensors but allows case where len % pieces != 0.

  Args:
    tensor: T `tf.Tensor` that must be 1D.
    pieces: a positive integer specifying the number of pieces into which
      tensor should be split.

  Returns:
    list of T `tf.Tensor` of length pieces, which hold the values of
      the input tensor, in order.  The final tensor may be shorter
      than the others, which will all be of equal length.

  Raises:
    ValueError: input tensor must be 1D.
  """
  shape = tensor.shape
  if 1 != len(shape):
    raise ValueError("input tensor must be 1D")
  tensor_len = shape.dims[0].value
  chunk_size = tensor_len // pieces
  with ops.colocate_with(tensor):
    if tensor_len != (pieces * chunk_size):
      # last piece will be short
      assert pieces > 1
      last_chunk_size = tensor_len - ((pieces - 1) * chunk_size)
      assert last_chunk_size > 0
      piece_lens = [chunk_size for _ in range(pieces - 1)] + [last_chunk_size]
      return array_ops.split(tensor, piece_lens)
    else:
      return array_ops.split(tensor, pieces)
def _split_batch(features, labels, number_of_shards, device):
  """Split input features and labes into batches."""

  def split_dictionary(dictionary):
    """Split a dictionary into shards."""
    shards = [{} for _ in range(number_of_shards)]
    for name, tensor in six.iteritems(dictionary):
      if isinstance(tensor, sparse_tensor.SparseTensor):
        for i, shard in enumerate(
            sparse_ops.sparse_split(
                sp_input=tensor, num_split=number_of_shards, axis=0)):
          shards[i][name] = shard
      else:
        for i, shard in enumerate(array_ops.split(tensor, number_of_shards)):
          shards[i][name] = shard
    return shards

  with ops_lib.name_scope('split_inputs'):
    with ops_lib.device(device):
      if isinstance(features, dict):
        feature_shards = split_dictionary(features)
      else:
        feature_shards = array_ops.split(features, number_of_shards)

      if labels is None:
        label_shards = None
      elif isinstance(labels, dict):
        label_shards = split_dictionary(labels)
      else:
        label_shards = array_ops.split(labels, number_of_shards)
  return feature_shards, label_shards
  def testZerosCacheDoesntLeakAcrossModes(self):
    with ops.Graph().as_default():
      t = random_ops.random_normal(shape=[100, 2])
      x = random_ops.random_normal(shape=[100, 4])
      dy = random_ops.random_normal(shape=[100, 4])
      with backprop.GradientTape() as gradient_tape:
        gradient_tape.watch(x)
        x1, _ = array_ops.split(x, num_or_size_splits=2, axis=1)
        y1 = x1 ** 2.
        y = array_ops.concat([y1, t], axis=1)

      dx = gradient_tape.gradient(y, x, output_gradients=dy)
      with self.test_session() as sess:
        sess.run(variables.global_variables_initializer())
        sess.run(dx)

    t = random_ops.random_normal(shape=[100, 2])
    x = random_ops.random_normal(shape=[100, 4])
    dy = random_ops.random_normal(shape=[100, 4])
    with backprop.GradientTape() as gradient_tape:
      gradient_tape.watch(x)
      x1, _ = array_ops.split(x, num_or_size_splits=2, axis=1)
      y1 = x1 ** 2.
      y = array_ops.concat([y1, t], axis=1)

    dx = gradient_tape.gradient(y, x, output_gradients=dy)
Beispiel #6
0
  def testSplit(self):
    for dtype in self.numeric_types:
      for axis in [0, -3]:
        self._testBinary(
            lambda x, y: array_ops.split(value=y, num_or_size_splits=3, axis=x),
            np.int32(axis),
            np.array([[[1], [2]], [[3], [4]], [[5], [6]]],
                     dtype=dtype),
            expected=[
                np.array([[[1], [2]]], dtype=dtype),
                np.array([[[3], [4]]], dtype=dtype),
                np.array([[[5], [6]]], dtype=dtype),
            ],
            equality_test=self.ListsAreClose)

      for axis in [1, -2]:
        self._testBinary(
            lambda x, y: array_ops.split(value=y, num_or_size_splits=2, axis=x),
            np.int32(axis),
            np.array([[[1], [2]], [[3], [4]], [[5], [6]]],
                     dtype=dtype),
            expected=[
                np.array([[[1]], [[3]], [[5]]], dtype=dtype),
                np.array([[[2]], [[4]], [[6]]], dtype=dtype),
            ],
            equality_test=self.ListsAreClose)
Beispiel #7
0
def cluster_feature_analysis(sess, user_ids):
    # Get trained parameters
    lstm_vars = [v for v in tf.all_variables() if v.name.startswith('lstm')]
    matrix_var = sess.run(lstm_vars[0])
    bias_var = sess.run(lstm_vars[1])
    
    # Split the gates
    matrix_i, matrix_j, matrix_f, matrix_o = sess.run(array_ops.split(1, 4, matrix_var))
    bias_i, bias_j, bias_f, bias_o = sess.run(array_ops.split(0, 4, bias_var))
    
    dict_i, dict_j, dict_f, dict_o = dict(), dict(), dict(), dict()
    for feature in range(len(config.feature_desc)):
        dict_i[feature] = []
        dict_j[feature] = []
        dict_f[feature] = []
        dict_o[feature] = []
    for user_id in user_ids:
        print user_id
        gates_i, gates_j, gates_f, gates_o = feature_importance(sess, user_id, matrix_i, 
                                                                matrix_j, matrix_f, matrix_o, 
                                                                bias_i, bias_j, bias_f, bias_o)
        for feature in range(len(config.feature_desc)):
            dict_i[feature].append(gates_i[feature])
            dict_j[feature].append(gates_j[feature])
            dict_f[feature].append(gates_f[feature])
            dict_o[feature].append(gates_o[feature])                        
    return dict_i, dict_j, dict_f, dict_o
Beispiel #8
0
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = array_ops.split(1, 2, state)
            concat = _linear([inputs, h], 4 * self._num_units, True, 0.,
                             self.weights_init, self.trainable, self.restore,
                             self.reuse)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = array_ops.split(1, 4, concat)

            new_c = (c * self._inner_activation(f + self._forget_bias) +
                     self._inner_activation(i) *
                     self._activation(j))
            new_h = self._activation(new_c) * self._inner_activation(o)

            if self._state_is_tuple:
                new_state = _rnn_cell.LSTMStateTuple(new_c, new_h)
            else:
                new_state = array_ops.concat(1, [new_c, new_h])

            # Retrieve RNN Variables
            with tf.variable_scope('Linear', reuse=True):
                self.W = tf.get_variable('Matrix')
                self.b = tf.get_variable('Bias')

            return new_h, new_state
  def call(self, inputs, states, training=None):
    h_tm1 = states[0]  # previous memory state
    c_tm1 = states[1]  # previous carry state

    # dropout matrices for input units
    dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
    # dropout matrices for recurrent units
    rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(
        h_tm1, training, count=4)

    if 0 < self.dropout < 1.:
      inputs_i = inputs * dp_mask[0]
      inputs_f = inputs * dp_mask[1]
      inputs_c = inputs * dp_mask[2]
      inputs_o = inputs * dp_mask[3]
    else:
      inputs_i = inputs
      inputs_f = inputs
      inputs_c = inputs
      inputs_o = inputs

    if 0 < self.recurrent_dropout < 1.:
      h_tm1_i = h_tm1 * rec_dp_mask[0]
      h_tm1_f = h_tm1 * rec_dp_mask[1]
      h_tm1_c = h_tm1 * rec_dp_mask[2]
      h_tm1_o = h_tm1 * rec_dp_mask[3]
    else:
      h_tm1_i = h_tm1
      h_tm1_f = h_tm1
      h_tm1_c = h_tm1
      h_tm1_o = h_tm1

    (kernel_i, kernel_f,
     kernel_c, kernel_o) = array_ops.split(self.kernel, 4, axis=3)
    (recurrent_kernel_i,
     recurrent_kernel_f,
     recurrent_kernel_c,
     recurrent_kernel_o) = array_ops.split(self.recurrent_kernel, 4, axis=3)

    if self.use_bias:
      bias_i, bias_f, bias_c, bias_o = array_ops.split(self.bias, 4)
    else:
      bias_i, bias_f, bias_c, bias_o = None, None, None, None

    x_i = self.input_conv(inputs_i, kernel_i, bias_i, padding=self.padding)
    x_f = self.input_conv(inputs_f, kernel_f, bias_f, padding=self.padding)
    x_c = self.input_conv(inputs_c, kernel_c, bias_c, padding=self.padding)
    x_o = self.input_conv(inputs_o, kernel_o, bias_o, padding=self.padding)
    h_i = self.recurrent_conv(h_tm1_i, recurrent_kernel_i)
    h_f = self.recurrent_conv(h_tm1_f, recurrent_kernel_f)
    h_c = self.recurrent_conv(h_tm1_c, recurrent_kernel_c)
    h_o = self.recurrent_conv(h_tm1_o, recurrent_kernel_o)

    i = self.recurrent_activation(x_i + h_i)
    f = self.recurrent_activation(x_f + h_f)
    c = f * c_tm1 + i * self.activation(x_c + h_c)
    o = self.recurrent_activation(x_o + h_o)
    h = o * self.activation(c)
    return h, [h, c]
Beispiel #10
0
 def _tf_to_cudnn_biases(self, *tf_biases):
   r"""Reverse the operations in StitchBiases()."""
   # b_ir is the summed bias of reset and update gate.
   b_ir, b_wh, b_rh = tf_biases
   bi, br = b_ir * 0.5, b_ir * 0.5
   b_wi, b_wr = array_ops.split(bi, 2, axis=0)
   b_ri, b_rr = array_ops.split(br, 2, axis=0)
   return b_wi, b_wr, b_wh, b_ri, b_rr, b_rh
  def testVariableShapeFunction(self):
    # size_splits too big
    with self.assertRaises(ValueError):
      array_ops.split([0, 1], [3, -1], axis=0)

    # Correct inference of variable dimension
    s0, s1 = array_ops.split([0, 1, 2], [2, -1], axis=0)
    assert s0.shape.as_list() == [2]
    assert s1.shape.as_list() == [1]
  def testInvalidNumOutputs(self):
    with self.assertRaisesRegexp(
        Exception,
        "Value for attr 'num_split' of -1 must be at least minimum 1"):
      array_ops.split(value=[1, 2, 3], num_or_size_splits=-1)

    with self.assertRaisesRegexp(
        Exception,
        "Value for attr 'num_split' of 0 must be at least minimum 1"):
      array_ops.split(value=[1, 2, 3], num_or_size_splits=0)
  def _testSpecialCasesVariable(self):
    inp = np.random.rand(4, 4).astype("f")

    with test_util.device(use_gpu=True):
      result = self.evaluate(array_ops.split(inp, [4], 0))
      self.assertAllEqual(result[0], inp)

      result = self.evaluate(array_ops.split(inp, [-1, 3], 0))
      self.assertAllEqual(result[0], inp[0:1, :])
      self.assertAllEqual(result[1], inp[1:4, :])
Beispiel #14
0
  def _testSpecialCasesVariable(self, use_gpu):
    inp = np.random.rand(4, 4).astype("f")

    with self.test_session(use_gpu=use_gpu) as sess:
      result = sess.run(array_ops.split(inp, [4], 0))
      self.assertAllEqual(result[0], inp)

      result = sess.run(array_ops.split(inp, [-1, 3], 0))
      self.assertAllEqual(result[0], inp[0:1, :])
      self.assertAllEqual(result[1], inp[1:4, :])
Beispiel #15
0
  def _untransform_gru_canonical(self, transformed_weights, transformed_biases):
    """The reverse procedure of _fuse_gru_canonical().

    Args:
      transformed_weights: a list of tensors, 3 for each layer. The 1st for
        reset and update gates; the 2nd and 3rd for the new memory gate.
      transformed_biases: 5 tensors each layer. The first for reset_and_update
        gate; the next two in line for candidate gate. The last 2 are original
        tensors for reset_and_update gates, retained since cuDNN biases are not
        restorable from the fused version.

    Returns:
      Two lists of tensors for weights and biases respectively.
      There are 6 tensors per weight and per bias for each layer:
      tensor 0-2 are applied to the input from the previous layer and
      tensor 3-5 to the recurrent input. Tensor 0 and 3 are for the reset gate;
      tensor 1 and 4 the update gate; tensor 2 and 5 the new memory gate.
    """
    weights, biases = [], []
    assert 5 * len(transformed_weights) == len(transformed_biases) * 3
    for i in range(len(transformed_weights) // 3):
      base_idx = 3 * i
      num_units = self._cudnn_rnn.num_units
      input_size = self._cudnn_rnn.input_size if i == 0 else num_units
      # reset and update gate weights applied on layer inputs.
      w_i = array_ops.slice(transformed_weights[base_idx], [0, 0],
                            [input_size, 2 * num_units])
      # reset and update gate weights applied on recurrent inputs.
      w_r = array_ops.slice(transformed_weights[base_idx], [input_size, 0],
                            [num_units, 2 * num_units])
      wi_list = array_ops.split(w_i, 2, axis=1)
      wr_list = array_ops.split(w_r, 2, axis=1)

      wi_list = [_flatten_transpose(w) for w in wi_list]
      wr_list = [_flatten_transpose(w) for w in wr_list]

      # candidate gate weights
      ih, hh = [
          _flatten_transpose(w)
          for w in transformed_weights[base_idx + 1:base_idx + 3]
      ]
      weights.extend(wi_list)
      weights.append(ih)
      weights.extend(wr_list)
      weights.append(hh)

      base_idx = 5 * i
      # Recover biases for reset and update gates.
      bi_list = array_ops.split(transformed_biases[base_idx + 3], 2, axis=0)
      br_list = array_ops.split(transformed_biases[base_idx + 4], 2, axis=0)
      biases.extend(bi_list)
      biases.append(transformed_biases[base_idx + 1])
      biases.extend(br_list)
      biases.append(transformed_biases[base_idx + 2])
    return weights, biases
  def testExplicitNum(self):
    size_splits = array_ops.constant([2, 2, 6], dtype=dtypes.int32)
    value = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

    # Eager and Graph modes raise different exceptions
    with self.assertRaises((errors_impl.InvalidArgumentError, ValueError)):
      array_ops.split(value, size_splits, num=4)

    r = self.evaluate(array_ops.split(value, size_splits, num=3))
    self.assertAllEqual(r[0], value[0:2])
    self.assertAllEqual(r[1], value[2:4])
    self.assertAllEqual(r[2], value[4:])
Beispiel #17
0
  def __call__(self, inputs, state, scope=None):
    """Recurrent Highway Network cell (RHN)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicRHNCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      if self._state_is_tuple:
        y = state
      else:
        y = array_ops.split(1, 1, state)
      assert self._recurrence_depth > 0 and type(self._recurrence_depth) is int
      # h_transform = [None] * self._recurrence_depth
      # t = [None] * self._recurrence_depth
      # s = [None] * self._recurrence_depth
      # concat = [None] * self._recurrence_depth
      # for i in range(self._recurrence_depth):
      #   if i == 0:
      #     concat[i] = _linear([inputs, h], 2 * self._num_units, True)
      #     # h = nonlinear transform, t = transfer gate
      #     h_transform[i], t[i] = array_ops.split(1, 2, concat[i])
      #     t[i] = sigmoid(t[i] + self._transfer_bias)
      #     s[i] = self._activation(h_transform[i]) * t[i] + \
      #         (1.0 - t[i]) * _linear([inputs], 1 * self._num_units, False)
      #   if i > 0:
      #     concat[i] = _linear([h], 2 * self._num_units, True)
      #     # h = nonlinear transform, t = transfer gate
      #     h_transform[i], t[i] = array_ops.split(1, 2, concat[i])
      #     t[i] = sigmoid(t[i] + self._transfer_bias)
      #     s[i] = self._activation(h_transform[i]) * t[i] + \
      #         (1.0 - t[i]) * s[i-1]

      # ALTERNATIVE IMPLEMENTATION:
      for i in range(self._recurrence_depth):
        if i == 0:
          concat = _linear([inputs, y], 2 * self._num_units, True)
          # h = nonlinear transform, t = transfer gate
          h, t = array_ops.split(1, 2, concat)
          t = sigmoid(t + self._transfer_bias)
          s = self._activation(h) * t + \
              (1.0 - t) * _linear([inputs], 1 * self._num_units, False)
        if i > 0:
          concat = _linear([s], 2 * self._num_units, True)
          # h = nonlinear transform, t = transfer gate
          h, t = array_ops.split(1, 2, concat)
          t = sigmoid(t + self._transfer_bias)
          s = self._activation(h) * t + \
              (1.0 - t) * s
      new_y = s

      if self._state_is_tuple:
        new_state = RHNStateTuple(new_y)
      else:
        new_state = array_ops.concat(1, new_y)
      return new_y
Beispiel #18
0
  def testNonexistentDimTensor(self):
    x = array_ops.placeholder(dtypes.int32)
    values = np.zeros([5, 30])
    splits = array_ops.placeholder(dtypes.int32)
    with self.assertRaisesRegexp(ValueError, "Cannot infer"):
      y = array_ops.split(values, splits, axis=x)

    splits = array_ops.placeholder(dtypes.int32, [3])
    y = array_ops.split(values, splits, axis=x)
    with self.test_session(use_gpu=True) as sess:
      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                   "must have exactly one element"):
        sess.run(y, {x: np.array([], dtype=np.int32), splits: [4, 11, 15]})
Beispiel #19
0
def _padded_split(tensor, pieces):
  """Like split for 1D tensors but pads-out case where len % pieces != 0.

  Args:
    tensor: T `tf.Tensor` that must be 1D.
    pieces: a positive integer specifying the number of pieces into which
      tensor should be split.

  Returns:
    list of T `tf.Tensor` of length pieces, which hold the values of
      thin input tensor, in order.  The final tensor may
      be zero-padded on the end to make its size equal to those of all
      of the other tensors.

  Raises:
    ValueError: The input tensor is not 1D.
  """
  shape = tensor.shape
  if 1 != len(shape):
    raise ValueError("input tensor must be 1D")
  tensor_len = shape.dims[0].value
  with ops.colocate_with(tensor):
    if tensor_len % pieces != 0:
      # pad to an even length
      chunk_size = 1 + tensor_len // pieces
      if pieces > tensor_len:
        # This is an edge case that should not come up in practice,
        # i.e. a different reduction algorithm would be better,
        # but we'll make it work just for completeness.
        pad_len = pieces - tensor_len
        extended_whole = array_ops.concat(
            [tensor, array_ops.zeros([pad_len], dtype=tensor.dtype)], 0)
        parts = array_ops.split(extended_whole, pieces)
        return parts, pad_len
      elif (pieces - 1) * chunk_size >= tensor_len:
        # Another edge case of limited real interest.
        pad_len = (pieces * chunk_size) % tensor_len
        extended_whole = array_ops.concat(
            [tensor, array_ops.zeros([pad_len], dtype=tensor.dtype)], 0)
        parts = array_ops.split(extended_whole, pieces)
        return parts, pad_len
      else:
        last_chunk_size = tensor_len - (pieces - 1) * chunk_size
        pad_len = chunk_size - last_chunk_size
        piece_lens = [chunk_size for _ in range(pieces - 1)] + [last_chunk_size]
        parts = array_ops.split(tensor, piece_lens)
        parts[-1] = array_ops.concat(
            [parts[-1], array_ops.zeros([pad_len], dtype=tensor.dtype)], 0)
        return parts, pad_len
    else:
      return array_ops.split(tensor, pieces), 0
Beispiel #20
0
 def _tensor_to_sparse_feature_column(dense_tensor):
     """Returns SparseFeatureColumn for the input dense_tensor."""
     ignore_value = 0.0
     sparse_indices = array_ops.where(
         math_ops.not_equal(dense_tensor, math_ops.cast(ignore_value, dense_tensor.dtype))
     )
     sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices)
     # TODO(sibyl-Aix6ihai, sibyl-vie3Poto): Makes this efficient, as now SDCA supports
     # very sparse features with weights and not weights.
     return sdca_ops.SparseFeatureColumn(
         array_ops.reshape(array_ops.split(1, 2, sparse_indices)[0], [-1]),
         array_ops.reshape(array_ops.split(1, 2, sparse_indices)[1], [-1]),
         array_ops.reshape(math_ops.to_float(sparse_values), [-1]),
     )
  def testShapeFunctionEdgeCases(self):
    # split_dim greater than rank of input.
    with self.assertRaises(ValueError):
      array_ops.split(value=[[0, 1], [2, 3]], num_or_size_splits=4, axis=2)

    # split dim less than -(rank of input)
    with self.assertRaises(ValueError):
      array_ops.split(value=[[0, 1], [2, 3]], num_or_size_splits=4, axis=-3)

    # num_split does not evenly divide the size in split_dim.
    with self.assertRaisesRegexp(ValueError, "should evenly divide"):
      array_ops.split(value=[0, 1, 2, 3], num_or_size_splits=3, axis=0)

    # Unknown split_dim.
    splits = array_ops.split(
        value=[[0, 1, 2, 3]],
        num_or_size_splits=4,
        axis=array_ops.placeholder(dtypes.int32))
    for s in splits:
      self.assertEqual([None, None], s.get_shape().as_list())

    # Unknown split_dim and input shape.
    splits = array_ops.split(
        value=array_ops.placeholder(dtypes.float32),
        num_or_size_splits=4,
        axis=array_ops.placeholder(dtypes.int32))
    for s in splits:
      self.assertEqual(None, s.get_shape().ndims)
  def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell (LSTM)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      c, h = array_ops.split(1, 2, state)
      concat = linear([inputs, h], 4 * self._num_units, True)

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      i, j, f, o = array_ops.split(1, 4, concat)

      new_c = c * sigmoid(f + self._forget_bias) + sigmoid(i) * tanh(j)
      new_h = tanh(new_c) * sigmoid(o)

      return new_h, array_ops.concat(1, [new_c, new_h])
Beispiel #23
0
    def __call__(self, inputs, state, scope):
        # Parameters of gates are concatenated into one multiply for efficiency.
        c, h = array_ops.split(1, 2, state)
        self.W, self.b, concat = _linear([inputs, h], 4 * self._num_units,
                                        self.bias, self.W, self.b, self.W_init,
                                        trainable=self.trainable, scope=scope)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(1, 4, concat)

        new_c = c * self.activation(f + self._forget_bias) + self.activation(
            i) * self.inner_activation(j)
        new_h = self.inner_activation(new_c) * self.activation(o)
        return new_h, array_ops.concat(1, [new_c, new_h])
Beispiel #24
0
  def _untransform_lstm_canonical(self, transformed_weights,
                                  transformed_biases):
    """The reverse procedure of _transform_lstm_canonical().

    Args:
      transformed_weights: a list of tensors, one for each layer.
      transformed_biases: a list of tensors , 3 for each layer: the 2nd for
        layer input, the 3rd for recurrent input, the 1st is the sum of the
        latter two.
    Returns:
      Two lists of tensors for weights and biases respectively.
      There are 8 tensors per weight and per bias for each layer:
      tensor 0-3 are applied to the input from the previous layer;
      tensor 4-7 to the recurrent input. Tensor 0 and 4 are for the input gate;
      tensor 1 and 5 the forget gate; tensor 2 and 6 the new memory gate;
      tensor 3 and 7 the output gate.
    """
    weights, biases = [], []
    assert 3 * len(transformed_weights) == len(transformed_biases)
    for i in range(len(transformed_weights)):
      num_units = self._cudnn_rnn.num_units
      input_size = self._cudnn_rnn.input_size if i == 0 else num_units
      # weights applied on layer inputs.
      wi = array_ops.slice(transformed_weights[i], [0, 0],
                           [input_size, 4 * num_units])
      # weights applied on recurrent inputs.
      wr = array_ops.slice(transformed_weights[i], [input_size, 0],
                           [num_units, 4 * num_units])
      wi_list = array_ops.split(wi, 4, axis=1)
      wr_list = array_ops.split(wr, 4, axis=1)

      for j in range(len(wi_list)):
        wi_list[j] = array_ops.reshape(array_ops.transpose(wi_list[j]), [-1])
        wr_list[j] = array_ops.reshape(array_ops.transpose(wr_list[j]), [-1])
      # canonical weights are in icfo order, convert to ifco order for cuDNN.
      self._switch_inner(wi_list, 0)
      self._switch_inner(wr_list, 0)
      weights.extend(wi_list)
      weights.extend(wr_list)

      base_idx = 3 * i
      bi_list = array_ops.split(transformed_biases[base_idx + 1], 4, axis=0)
      br_list = array_ops.split(transformed_biases[base_idx + 2], 4, axis=0)
      # canonical weights are in icfo order, convert to ifco order for cuDNN.
      self._switch_inner(bi_list, 0)
      self._switch_inner(br_list, 0)
      biases.extend(bi_list)
      biases.extend(br_list)
    return weights, biases
def tfsplits(_):
  """A more complex graph, including splits."""
  x = array_ops.placeholder(dtypes.float32, shape=[2, 2], name='x')
  y = array_ops.placeholder(dtypes.float32, shape=[2, 2], name='y')
  for _ in range(3):
    x0, x1 = array_ops.split(x, 2, 0)
    y0, y1 = array_ops.split(y, 2, 0)
    x0 += 1
    y0 += 1
    z = math_ops.matmul(x, y, name='x_y_prod')
    a = array_ops.concat([x0, y1], axis=0, name='concat_x0_y1')
    b = array_ops.concat([y0, x1], axis=0, name='concat_y0_x1')
    x = math_ops.matmul(a, b, name='a_b')
    y = math_ops.add(x, z)
  array_ops.identity(y, name='result')
  def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell (LSTM)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      if self._state_is_tuple:
        c, h = state
      else:
        c, h = array_ops.split(1, 2, state)

      s1 = vs.get_variable("s1", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32)
      s2 = vs.get_variable("s2", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32)
      s3 = vs.get_variable("s3", initializer=tf.ones([self._num_units]), dtype=tf.float32)

      b1 = vs.get_variable("b1", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32)
      b2 = vs.get_variable("b2", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32)
      b3 = vs.get_variable("b3", initializer=tf.zeros([self._num_units]), dtype=tf.float32)

      # s1 = tf.Variable(tf.ones([4 * self._num_units]), name="s1")
      # s2 = tf.Variable(tf.ones([4 * self._num_units]), name="s2")
      # s3 = tf.Variable(tf.ones([self._num_units]), name="s3")
      #
      # b1 = tf.Variable(tf.zeros([4 * self._num_units]), name="b1")
      # b2 = tf.Variable(tf.zeros([4 * self._num_units]), name="b2")
      # b3 = tf.Variable(tf.zeros([self._num_units]), name="b3")

      input_below_ = rnn_cell._linear([inputs],
                                      4 * self._num_units, False, scope="out_1")
      input_below_ = ln(input_below_, s1, b1)
      state_below_ = rnn_cell._linear([h],
                                      4 * self._num_units, False, scope="out_2")
      state_below_ = ln(state_below_, s2, b2)
      lstm_matrix = tf.add(input_below_, state_below_)

      i, j, f, o = array_ops.split(1, 4, lstm_matrix)

      new_c = (c * sigmoid(f) + sigmoid(i) *
               self._activation(j))

      # Currently normalizing c causes lot of nan's in the model, thus commenting it out for now.
      # new_c_ = ln(new_c, s3, b3)
      new_c_ = new_c
      new_h = self._activation(new_c_) * sigmoid(o)

      if self._state_is_tuple:
        new_state = LSTMStateTuple(new_c, new_h)
      else:
        new_state = array_ops.concat(1, [new_c, new_h])
      return new_h, new_state
Beispiel #27
0
    def __call__(self, inputs, state, scope=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
            with tf.variable_scope("Gates"):  # Reset gate and update gate.
                # We start with bias of 1.0 to not reset and not update.
                r, u = array_ops.split(1, 2, _linear([inputs, state],
                    2 * self._num_units, True, 1.0, self.weights_init,
                    self.trainable, self.restore, self.reuse))
                r, u = self._inner_activation(r), self._inner_activation(u)
            with tf.variable_scope("Candidate"):
                c = self._activation(
                    _linear([inputs, r * state], self._num_units, True, 0.,
                            self.weights_init, self.trainable, self.restore,
                            self.reuse))
            new_h = u * state + (1 - u) * c

            self.W, self.b = list(), list()
            # Retrieve RNN Variables
            with tf.variable_scope('Gates/Linear', reuse=True):
                self.W.append(tf.get_variable('Matrix'))
                self.b.append(tf.get_variable('Bias'))
            with tf.variable_scope('Candidate/Linear', reuse=True):
                self.W.append(tf.get_variable('Matrix'))
                self.b.append(tf.get_variable('Bias'))

        return new_h, new_h
 def split(self, value, lengths, name=None):
   """See TensorArray."""
   # error checking to match graph-mode errors
   value = constant_op.constant(value)
   lengths = constant_op.constant(lengths)
   sum_lengths = math_ops.reduce_sum(lengths)
   if lengths.shape.ndims != 1:
     raise errors_impl.InvalidArgumentError(
         None, None, "Expected lengths to be a vector, received shape: %s" %
         lengths.shape.as_list())
   elif value.shape.ndims == 0:
     raise errors_impl.InvalidArgumentError(
         None, None, "Expected value to be at least a vector, "
         "but received shape: %s" % value.shape.as_list())
   elif sum_lengths.numpy() != value.shape.as_list()[0]:
     raise errors_impl.InvalidArgumentError(
         None, None, "Expected sum of lengths to be equal to "
         "values.shape[0], but sum of lengths is %d and "
         "value's shape is: %s " % (sum_lengths.numpy(),
                                    value.shape.as_list()))
   elif not self._dynamic_size and lengths.shape[0] != len(self._tensor_array):
     raise errors_impl.InvalidArgumentError(
         None, None, "TensorArray's size is not equal to the size of "
         "lengths (%d vs. %d), and the TensorArray is not marked as "
         "dynamically resizeable" % (len(self._tensor_array),
                                     lengths.shape[0]))
   else:
     ta = self._identity_without_array()
     tensor_array = array_ops.split(value, lengths, name=name)
     ta._implementation._tensor_array = tensor_array  # pylint: disable=protected-access
     return ta
Beispiel #29
0
 def call(self, inputs, state):
   """Gated recurrent unit (GRU) with nunits cells."""
   with vs.variable_scope("gates"):  # Reset gate and update gate.
     # We start with bias of 1.0 to not reset and not update.
     bias_ones = self._bias_initializer
     if self._bias_initializer is None:
       dtype = inputs.dtype
       bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
     # pylint: disable=protected-access
     value = math_ops.sigmoid(
         rnn_cell_impl._linear([inputs, state], 2 * self._num_units, True,
                               bias_ones, self._kernel_initializer))
     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
     # pylint: enable=protected-access
   with vs.variable_scope("candidate"):
     # pylint: disable=protected-access
     with vs.variable_scope("input_projection"):
       hi = rnn_cell_impl._linear(inputs, self._num_units, True,
                                  self._bias_initializer,
                                  self._kernel_initializer)
     with vs.variable_scope("hidden_projection"):
       hh = r * (rnn_cell_impl._linear(state, self._num_units, True,
                                       self._bias_initializer,
                                       self._kernel_initializer))
     # pylint: enable=protected-access
     c = self._activation(hi + hh)
   new_h = u * state + (1 - u) * c
   return new_h, new_h
Beispiel #30
0
  def testReuse(self):

    def f(x):
      return core_layers.dense(x, self.CHANNELS // 2)

    def g(x):
      return core_layers.dense(x, self.CHANNELS // 2)

    x = random_ops.random_uniform(
        [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32)
    x1, x2 = array_ops.split(x, 2, axis=-1)

    with variable_scope.variable_scope("test"):
      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)

    num_vars_before = len(variables.global_variables())

    with variable_scope.variable_scope("test", reuse=True):
      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)

    num_vars_after = len(variables.global_variables())
    self.assertEqual(num_vars_before, num_vars_after)

    loss = math_ops.reduce_mean(y1 + y2)
    _ = gradients_impl.gradients(loss,
                                 [x] + variables.trainable_variables())

    with variable_scope.variable_scope("test", reuse=True):
      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)

    num_vars_after = len(variables.global_variables())
    self.assertEqual(num_vars_before, num_vars_after)
Beispiel #31
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: state Tensor, 2D, batch x state_size.
      scope: VariableScope for the created subgraph; defaults to "LSTMCell".

    Returns:
      A tuple containing:
      - A 2D, batch x output_dim, Tensor representing the output of the LSTM
        after reading "inputs" when previous state was "state".
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - A 2D, batch x state_size, Tensor representing the new state of LSTM
        after reading "inputs" when previous state was "state".
    Raises:
      ValueError: if an input_size was specified and the provided inputs have
        a different dimension.
    """
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
        m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

        dtype = inputs.dtype
        actual_input_size = inputs.get_shape().as_list()[1]
        if self._input_size and self._input_size != actual_input_size:
            raise ValueError(
                "Actual input size not same as specified: %d vs %d." %
                actual_input_size, self._input_size)
        with vs.variable_scope(scope or type(self).__name__,
                               initializer=self._initializer):  # "LSTMCell"
            concat_w = _get_concat_variable(
                "W", [actual_input_size + num_proj, 4 * self._num_units],
                dtype, self._num_unit_shards)

            b = vs.get_variable("B",
                                shape=[4 * self._num_units],
                                initializer=array_ops.zeros_initializer,
                                dtype=dtype)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            cell_inputs = array_ops.concat(1, [inputs, m_prev])
            lstm_matrix = nn_ops.bias_add(
                math_ops.matmul(cell_inputs, concat_w), b)
            i, j, f, o = array_ops.split(1, 4, lstm_matrix)

            # Diagonal connections
            if self._use_peepholes:
                w_f_diag = vs.get_variable("W_F_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_i_diag = vs.get_variable("W_I_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_o_diag = vs.get_variable("W_O_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)

            if self._use_peepholes:
                c = (sigmoid(f + 1 + w_f_diag * c_prev) * c_prev +
                     sigmoid(i + w_i_diag * c_prev) * tanh(j))
            else:
                c = (sigmoid(f + 1) * c_prev + sigmoid(i) * tanh(j))

            if self._cell_clip is not None:
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)

            if self._use_peepholes:
                m = sigmoid(o + w_o_diag * c) * tanh(c)
            else:
                m = sigmoid(o) * tanh(c)

            if self._num_proj is not None:
                concat_w_proj = _get_concat_variable(
                    "W_P", [self._num_units, self._num_proj], dtype,
                    self._num_proj_shards)

                m = math_ops.matmul(m, concat_w_proj)

        return m, array_ops.concat(1, [c, m])
Beispiel #32
0
    def call(self, inputs, states, training=None):
        """
        inputs: shape is [batch , window_size, number_of_sensor, 1]
        """

        h_state = states[0]  # previous memory state

        c_state = states[1]  # previous carry state
        #c_shape = c_tm1.get_shape().as_list()                                             # [BATCH, conv_rest, 1, LAST_FILTER]
        #
        # dropout matrices for input units
        dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
        # dropout matrices for recurrent units
        rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(h_state,
                                                               training,
                                                               count=4)

        if 0 < self.dropout < 1.:
            x_i = inputs * dp_mask[0]
            x_f = inputs * dp_mask[1]
            x_c = inputs * dp_mask[2]
            x_o = inputs * dp_mask[3]
        else:
            x_i = inputs
            x_f = inputs
            x_c = inputs
            x_o = inputs

        if 0 < self.recurrent_dropout < 1.:
            h_i = h_state * rec_dp_mask[0]
            h_f = h_state * rec_dp_mask[1]
            h_c = h_state * rec_dp_mask[2]
            h_o = h_state * rec_dp_mask[3]
        else:
            h_i = h_state
            h_f = h_state
            h_c = h_state
            h_o = h_state

        for index in range(self.number_of_layer):
            # weights for inputs in FOUR GATES
            (kernel_i, kernel_f, kernel_c,
             kernel_o) = array_ops.split(self.kernel[index], 4, axis=3)
            # weights for hidden states in FOUR GATES
            if index == self.number_of_layer - 1:
                (recurrent_kernel_i, recurrent_kernel_f, recurrent_kernel_c,
                 recurrent_kernel_o, recurrent_kernel_c_1) = array_ops.split(
                     self.recurrent_kernel[index], 5, axis=3)
            else:
                (recurrent_kernel_i, recurrent_kernel_f, recurrent_kernel_c,
                 recurrent_kernel_o) = array_ops.split(
                     self.recurrent_kernel[index], 4, axis=3)
            #######################################################################################
            # weights for BIAS in FOUR GATES
            if self.use_bias:
                bias_i, bias_f, bias_c, bias_o = array_ops.split(
                    self.bias[index], 4)
            else:
                bias_i, bias_f, bias_c, bias_o = None, None, None, None
            x_i = self.input_conv(x_i, kernel_i, bias_i, padding=self.padding)
            x_f = self.input_conv(x_f, kernel_f, bias_f, padding=self.padding)
            x_c = self.input_conv(x_c, kernel_c, bias_c, padding=self.padding)
            x_o = self.input_conv(x_o, kernel_o, bias_o, padding=self.padding)

            h_i = self.recurrent_conv(h_i, recurrent_kernel_i)
            h_f = self.recurrent_conv(h_f, recurrent_kernel_f)
            h_c = self.recurrent_conv(h_c, recurrent_kernel_c)
            h_o = self.recurrent_conv(h_o, recurrent_kernel_o)

            if index == self.number_of_layer - 1:
                #######################################################################################
                c_c = self.recurrent_conv(c_state, recurrent_kernel_c_1)

                i = self.recurrent_activation[index](x_i + h_i)
                f = self.recurrent_activation[index](x_f + h_f)
                o = self.recurrent_activation[index](x_o + h_o)

                c = f * c_c + i * self.activation(x_c + h_c)
                h = o * self.activation(c)
            else:

                x_i = self.conv_activation[index](x_i)
                x_f = self.conv_activation[index](x_f)
                x_c = self.conv_activation[index](x_c)
                x_o = self.conv_activation[index](x_o)

                h_i = self.recurrent_activation[index](h_i)
                h_f = self.recurrent_activation[index](h_f)
                h_c = self.recurrent_activation[index](h_c)
                h_o = self.recurrent_activation[index](h_o)

            if index == 1:
                self.data_format = "channels_last"

        self.data_format = None
        return h, [h, c]
def frechet_classifier_distance(real_images,
                                generated_images,
                                classifier_fn,
                                num_batches=1):
  """Classifier distance for evaluating a generative model.

  This is based on the Frechet Inception distance, but for an arbitrary
  classifier.

  This technique is described in detail in https://arxiv.org/abs/1706.08500.
  Given two Gaussian distribution with means m and m_w and covariance matrices
  C and C_w, this function calcuates

  |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2))

  which captures how different the distributions of real images and generated
  images (or more accurately, their visual features) are. Note that unlike the
  Inception score, this is a true distance and utilizes information about real
  world images.

  Note that when computed using sample means and sample covariance matrices,
  Frechet distance is biased. It is more biased for small sample sizes. (e.g.
  even if the two distributions are the same, for a small sample size, the
  expected Frechet distance is large). It is important to use the same
  sample size to compute frechet classifier distance when comparing two
  generative models.

  Args:
    real_images: Real images to use to compute Frechet Inception distance.
    generated_images: Generated images to use to compute Frechet Inception
      distance.
    classifier_fn: A function that takes images and produces activations
      based on a classifier.
    num_batches: Number of batches to split images in to in order to
      efficiently run them through the classifier network.

  Returns:
    The Frechet Inception distance. A floating-point scalar.
  """

  real_images_list = array_ops.split(
      real_images, num_or_size_splits=num_batches)
  generated_images_list = array_ops.split(
      generated_images, num_or_size_splits=num_batches)

  imgs = array_ops.stack(real_images_list + generated_images_list)

  # Compute the activations using the memory-efficient `map_fn`.
  activations = functional_ops.map_fn(
      fn=classifier_fn,
      elems=imgs,
      parallel_iterations=1,
      back_prop=False,
      swap_memory=True,
      name='RunClassifier')

  # Split the activations by the real and generated images.
  real_a, gen_a = array_ops.split(activations, [num_batches, num_batches], 0)

  # Ensure the activations have the right shapes.
  real_a = array_ops.concat(array_ops.unstack(real_a), 0)
  gen_a = array_ops.concat(array_ops.unstack(gen_a), 0)
  real_a.shape.assert_has_rank(2)
  gen_a.shape.assert_has_rank(2)

  # Compute mean and covariance matrices of activations.
  m = math_ops.reduce_mean(real_a, 0)
  m_v = math_ops.reduce_mean(gen_a, 0)
  num_examples = math_ops.to_float(array_ops.shape(real_a)[0])

  # sigma = (1 / (n - 1)) * (X - mu) (X - mu)^T
  sigma = math_ops.matmul(
      real_a - m, real_a - m, transpose_a=True) / (num_examples - 1)

  sigma_v = math_ops.matmul(
      gen_a - m_v, gen_a - m_v, transpose_a=True) / (num_examples - 1)

  # Find the Tr(sqrt(sigma sigma_v)) component of FID
  sqrt_trace_component = trace_sqrt_product(sigma, sigma_v)

  # Compute the two components of FID.

  # First the covariance component.
  # Here, note that trace(A + B) = trace(A) + trace(B)
  trace = math_ops.trace(sigma + sigma_v) - 2.0 * sqrt_trace_component

  # Next the distance between means.
  mean = math_ops.square(linalg_ops.norm(m - m_v))  # This uses the L2 norm.
  fid = trace + mean

  return fid
Beispiel #34
0
 def _bag_features(self, tree_num, input_data):
   split_data = array_ops.split(
       value=input_data, num_or_size_splits=self.params.num_features, axis=1)
   return array_ops.concat(
       [split_data[ind] for ind in self.params.bagged_features[tree_num]], 1)
Beispiel #35
0
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM).

    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: An `LSTMStateTuple` of state tensors, each shaped
        `[batch_size, self.state_size]`, if `state_is_tuple` has been set to
        `True`.  Otherwise, a `Tensor` shaped
        `[batch_size, 2 * self.state_size]`.

    Returns:
      A pair containing the new hidden state, and the new state (either a
        `LSTMStateTuple` or a concatenated state, depending on
        `state_is_tuple`).
    """
        sigmoid = math_ops.sigmoid
        one = constant_op.constant(1, dtype=dtypes.int32)
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)

        gate_inputs = math_ops.matmul(array_ops.concat([inputs, h], 1),
                                      self._kernel)
        gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)

        f_master_gate = _cumsoftmax(gate_inputs[:, :self._levels], 'l2r')
        f_master_gate = array_ops.expand_dims(f_master_gate, 2)
        i_master_gate = _cumsoftmax(
            gate_inputs[:, self._levels:self._levels * 2], 'r2l')
        i_master_gate = array_ops.expand_dims(i_master_gate, 2)

        gate_inputs = gen_array_ops.reshape(
            gate_inputs[:, self._levels * 2:],
            [-1, self._levels * 4, self._chunk_size])

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=gate_inputs,
                                     num_or_size_splits=4,
                                     axis=one)

        forget_bias_tensor = constant_op.constant(self._forget_bias,
                                                  dtype=f.dtype)
        # Note that using `add` and `multiply` instead of `+` and `*` gives a
        # performance improvement. So using those at the cost of readability.

        add = math_ops.add
        multiply = math_ops.multiply
        overlap = multiply(f_master_gate, i_master_gate)
        c = gen_array_ops.reshape(c, [-1, self._levels, self._chunk_size])
        new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))),
                    multiply(sigmoid(i), self._activation(j)))

        new_c = add(
            add(multiply(overlap, new_c), multiply((f_master_gate - overlap),
                                                   c)),
            multiply((i_master_gate - overlap), self._activation(j)))

        new_h = multiply(self._activation(new_c), sigmoid(o))

        new_c = gen_array_ops.reshape(new_c, [-1, self._num_units])
        new_h = gen_array_ops.reshape(new_h, [-1, self._num_units])

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
Beispiel #36
0
def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
    """Gradient for concat op.

  Args:
    op: An operation.
    grad: `Tensor` or `IndexedSlices` representing the gradients with respect
      to each output of the op.
    start_value_index: An integer index of the first value in the op.inputs.
    end_value_index: An integer index of the last value in the op.inputs.
    dim_index: An interger index of concat_dim or axis parameter in op.inputs.

  Returns:
    Tensors represending the partial gradients with respect to each input
    of the op.

  Raises:
    ValueError: if concat_dim/axis is not statically known.
  """
    def _CreateDenseMaskAndBegin(sizes, concat_dim):
        """Create variables for iteratively slicing a dense gradients tensor."""
        # Since shape is 1-D, shape_of_shape = [rank-of-inputs]
        shape_of_shape = array_ops.shape(sizes[0])
        # Make a vector of length equal to the input's dimensions,
        # with 0's everywhere and 1 in the concat dim position.
        # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now)
        mask = array_ops.concat([
            array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1],
            array_ops.fill(shape_of_shape - concat_dim - 1, 0)
        ], 0)
        begin = array_ops.fill(shape_of_shape, 0)
        return mask, begin

    def _ExtractInputShapes(inputs):
        """Extract the shapes of a set of input tensors."""
        sizes = []
        fully_known = True
        for x in inputs:
            input_shape = array_ops.shape(x)
            if not isinstance(input_shape,
                              ops.Tensor) or input_shape.op.type != "Const":
                fully_known = False
                break
            else:
                sizes.append(input_shape)

        if fully_known:
            return sizes
        else:
            return array_ops.shape_n(inputs)

    # Degenerate concatenation, just return grad.
    if len(op.inputs) == 2:
        return grad + [None] if end_value_index <= dim_index else [None] + grad

    concat_dim = op.inputs[dim_index]
    input_values = op.inputs[start_value_index:end_value_index]
    # Using mod here for convenience since concat_dim is already verified
    # in concat implementation to be within the allowed [-rank, rank) range.
    non_neg_concat_dim = concat_dim % array_ops.rank(input_values[0])

    out_grads = []
    if isinstance(grad, ops.Tensor):
        # Get the inputs' tensor shapes
        sizes = _ExtractInputShapes(input_values)
        # The magic number of 16 was found through benchmarking a range of sizes
        # on CPUs and a Maxwell TitanX.  A speedup was seen in a large majority of
        # cases when switching implementations at N=16, but it is possible that
        # there will be a small number of performance regressions.
        # pylint: disable=protected-access
        if len(sizes) > 16:
            # extract the size of each input along the concat dimension
            sizes = array_ops.squeeze(
                array_ops.slice(array_ops.stack(sizes, axis=1),
                                [non_neg_concat_dim, 0], [1, -1]))
            out_grads = array_ops.split(grad, sizes, non_neg_concat_dim)
        else:
            offset = gen_array_ops._concat_offset(non_neg_concat_dim, sizes)
            for (begin, size) in zip(offset, sizes):
                out_grads.append(array_ops.slice(grad, begin, size))
        # pylint: enable=protected-access
    elif isinstance(grad, ops.IndexedSlices):
        concat_dim_static = tensor_util.constant_value(concat_dim)
        if concat_dim_static is None:
            raise ValueError("Can only compute IndexedSlices gradient with "
                             "statically-known concat_dim")
        if concat_dim_static < 0:
            rank = tensor_util.constant_value(array_ops.rank(input_values[0]))
            if rank is None:
                raise ValueError(
                    "Can only compute IndexedSlices gradient with "
                    "negative concat_dim when first value rank is "
                    "statically-known.")
            concat_dim_static %= rank
        # Get the inputs' tensor shapes
        sizes = [array_ops.shape(x) for x in input_values]
        if concat_dim_static > 0:
            # IndexedSlices, non_neg_concat_dim > 0. Each input gets IndexedSlices
            # gradients with all the indices, but with grad.values sliced accordingly.
            # This is like the Tensor case, except shape(grad.values)[0] is not equal
            # to shape(sizes[i])[0], since only a subset of the dim-0 values are
            # stored.
            mask, begin = _CreateDenseMaskAndBegin(sizes, non_neg_concat_dim)
            for size in sizes:
                new_values = array_ops.slice(
                    grad.values, begin,
                    array_ops.concat(
                        [[-1], array_ops.slice(size, [1], [-1])], 0))
                out_grads.append(
                    ops.IndexedSlices(new_values, grad.indices, size))
                # Lint complains begin = begin + ...
                begin = math_ops.add(begin, size * mask)
        else:
            # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients
            # only for the relevant indices.
            start = constant_op.constant(0, dtype=grad.indices.dtype)
            for size in sizes:
                size_concat_dim = array_ops.gather(size, non_neg_concat_dim)
                if size_concat_dim.dtype != grad.indices.dtype:
                    size_concat_dim = math_ops.cast(size_concat_dim,
                                                    dtype=grad.indices.dtype)
                end = start + size_concat_dim
                # Compute the 1-D Tensor of indices relevant for this input.
                indices_to_select = array_ops.squeeze(array_ops.where(
                    math_ops.logical_and(grad.indices >= start,
                                         grad.indices < end)),
                                                      squeeze_dims=[1])
                new_indices = array_ops.gather(grad.indices,
                                               indices_to_select) - start
                new_values = array_ops.gather(grad.values, indices_to_select)
                out_grads.append(
                    ops.IndexedSlices(new_values, new_indices, size))
                start = end
    else:
        raise TypeError("Expected Tensor or IndexedSlices, got %s" %
                        type(grad))

    return (out_grads + [None] if end_value_index <= dim_index else [None] +
            out_grads)
Beispiel #37
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.

        Args:
          inputs: input Tensor, 2D, batch x num_units.
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.
          scope: VariableScope for the created subgraph; defaults to "LSTMCell".

        Returns:
          A tuple containing:
          - A `2-D, [batch x output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.

        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        with vs.variable_scope(scope or type(self).__name__,
                               initializer=self._initializer):  # "LSTMCell"
            i_size = input_size.value - 1  # -1 to extract time
            times = array_ops.slice(inputs, [0, i_size], [-1, 1])
            filtered_inputs = array_ops.slice(inputs, [0, 0], [-1, i_size])

            # --------------------------------------- #
            # ------------- PHASED LSTM ------------- #
            # ---------------- BEGIN ---------------- #
            # --------------------------------------- #

            tau = vs.get_variable("T",
                                  shape=[self._num_units],
                                  initializer=random_exp_initializer(
                                      0, self.tau_init),
                                  dtype=dtype)

            r_on = vs.get_variable("R",
                                   shape=[self._num_units],
                                   initializer=init_ops.constant_initializer(
                                       self.r_on_init),
                                   dtype=dtype)

            s = vs.get_variable(
                "S",
                shape=[self._num_units],
                initializer=init_ops.random_uniform_initializer(
                    0., tau.initialized_value()),
                dtype=dtype)
            # for backward compatibility (v < 0.12.0) use the following line instead of the above
            # initializer = init_ops.random_uniform_initializer(0., tau), dtype = dtype)

            tau_broadcast = tf.expand_dims(tau, dim=0)
            r_on_broadcast = tf.expand_dims(r_on, dim=0)
            s_broadcast = tf.expand_dims(s, dim=0)

            r_on_broadcast = tf.abs(r_on_broadcast)
            tau_broadcast = tf.abs(tau_broadcast)
            times = tf.tile(times, [1, self._num_units])

            # calculate kronos gate
            phi = tf.div(
                tf.mod(
                    tf.mod(times - s_broadcast, tau_broadcast) + tau_broadcast,
                    tau_broadcast), tau_broadcast)
            is_up = tf.less(phi, (r_on_broadcast * 0.5))
            is_down = tf.logical_and(tf.less(phi, r_on_broadcast),
                                     tf.logical_not(is_up))

            k = tf.select(
                is_up, phi / (r_on_broadcast * 0.5),
                tf.select(is_down, 2. - 2. * (phi / r_on_broadcast),
                          self.alpha * phi))

            # --------------------------------------- #
            # ------------- PHASED LSTM ------------- #
            # ----------------- END ----------------- #
            # --------------------------------------- #

            concat_w = _get_concat_variable(
                "W", [i_size + num_proj, 4 * self._num_units], dtype,
                self._num_unit_shards)

            b = vs.get_variable("B",
                                shape=[4 * self._num_units],
                                initializer=init_ops.zeros_initializer,
                                dtype=dtype)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            cell_inputs = array_ops.concat(1, [filtered_inputs, m_prev])
            lstm_matrix = nn_ops.bias_add(
                math_ops.matmul(cell_inputs, concat_w), b)
            i, j, f, o = array_ops.split(1, 4, lstm_matrix)

            # Diagonal connections
            if self._use_peepholes:
                w_f_diag = vs.get_variable("W_F_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_i_diag = vs.get_variable("W_I_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_o_diag = vs.get_variable("W_O_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)

            if self._use_peepholes:
                c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) *
                     c_prev +
                     sigmoid(i + w_i_diag * c_prev) * self._activation(j))
            else:
                c = (sigmoid(f + self._forget_bias) * c_prev +
                     sigmoid(i) * self._activation(j))

            if self._cell_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)
                # pylint: enable=invalid-unary-operand-type

            if self._use_peepholes:
                m = sigmoid(o + w_o_diag * c) * self._activation(c)
            else:
                m = sigmoid(o) * self._activation(c)

            if self._num_proj is not None:
                concat_w_proj = _get_concat_variable(
                    "W_P", [self._num_units, self._num_proj], dtype,
                    self._num_proj_shards)

                m = tf.math_ops.matmul(m, concat_w_proj)
                if self._proj_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    m = clip_ops.clip_by_value(m, -self._proj_clip,
                                               self._proj_clip)
                    # pylint: enable=invalid-unary-operand-type

            # APPLY KRONOS GATE
            c = k * c + (1. - k) * c_prev
            m = k * m + (1. - k) * m_prev
            # END KRONOS GATE

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat(1, [c, m]))
        return m, new_state
Beispiel #38
0
    def call(self, inputs, states, training=None):
        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state
        ############################################
        c_shape = c_tm1.get_shape().as_list()
        c_shape
        #print(h_tm1)
        #print('c_tm1',c_tm1)
        #if h_tm1.shape ==c_tm1.shape

        # dropout matrices for input units
        dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
        # dropout matrices for recurrent units
        rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(h_tm1,
                                                               training,
                                                               count=4)

        if 0 < self.dropout < 1.:
            inputs_i = inputs * dp_mask[0]
            inputs_f = inputs * dp_mask[1]
            inputs_c = inputs * dp_mask[2]
            inputs_o = inputs * dp_mask[3]
        else:
            inputs_i = inputs
            inputs_f = inputs
            inputs_c = inputs
            inputs_o = inputs

        if 0 < self.recurrent_dropout < 1.:
            h_tm1_i = h_tm1 * rec_dp_mask[0]
            h_tm1_f = h_tm1 * rec_dp_mask[1]
            h_tm1_c = h_tm1 * rec_dp_mask[2]
            h_tm1_o = h_tm1 * rec_dp_mask[3]
        else:
            h_tm1_i = h_tm1
            h_tm1_f = h_tm1
            h_tm1_c = h_tm1
            h_tm1_o = h_tm1

        (kernel_i, kernel_f, kernel_c, kernel_o) = array_ops.split(self.kernel,
                                                                   4,
                                                                   axis=3)
        ##################################
        (cov_kernel_i, cov_kernel_f, cov_kernel_c,
         cov_kernel_o) = array_ops.split(self.cov_kernel, 4, axis=3)
        #print ('cov_kernel_i',cov_kernel_i)
        ##########################################
        (recurrent_kernel_i, recurrent_kernel_f, recurrent_kernel_c,
         recurrent_kernel_o) = array_ops.split(self.recurrent_kernel,
                                               4,
                                               axis=3)
        #########################################################
        (recurrent_kernel_i_c, recurrent_kernel_f_c, recurrent_kernel_c_c,
         recurrent_kernel_o_c,
         recurrent_kernel_c_1) = array_ops.split(self.cov_recurrent_kernel,
                                                 5,
                                                 axis=3)
        ############################################################

        if self.use_bias:
            bias_i, bias_f, bias_c, bias_o = array_ops.split(self.bias, 4)
        else:
            bias_i, bias_f, bias_c, bias_o = None, None, None, None

        x_i = self.input_conv(inputs_i, kernel_i, bias_i, padding=self.padding)

        x_f = self.input_conv(inputs_f, kernel_f, bias_f, padding=self.padding)

        x_c = self.input_conv(inputs_c, kernel_c, bias_c, padding=self.padding)

        x_o = self.input_conv(inputs_o, kernel_o, bias_o, padding=self.padding)

        #################################################
        #x_i_c = x_i
        #print('---------------------------------------')
        #print('x_f', x_f)
        #print('inputs_f',inputs_f,'kernel_f',kernel_f)
        #print('inputs_i',inputs_i,'kernel_i',kernel_i)
        #print('x_i', x_i,'cov_kernel_i',cov_kernel_i)
        #print('---------------------------------------')
        x_i_c = self.input_conv_u(x_i, cov_kernel_i, padding=self.padding)

        x_f_c = self.input_conv_u(x_f, cov_kernel_f, padding=self.padding)

        x_c_c = self.input_conv_u(x_c, cov_kernel_c, padding=self.padding)

        x_o_c = self.input_conv_u(x_o, cov_kernel_o, padding=self.padding)

        #print('x_i',x_i,'cov_kernel_i',cov_kernel_i)
        #print('x_i_c', x_i_c)
        ############################################################
        #print('h_tm1_i',h_tm1_i,'recurrent_kernel_i',recurrent_kernel_i)

        h_i = self.recurrent_conv(h_tm1_i, recurrent_kernel_i)

        h_f = self.recurrent_conv(h_tm1_f, recurrent_kernel_f)

        h_c = self.recurrent_conv(h_tm1_c, recurrent_kernel_c)

        h_o = self.recurrent_conv(h_tm1_o, recurrent_kernel_o)

        #####################################################
        #print('h_i',h_i,'recurrent_kernel_i_c',recurrent_kernel_i_c)
        #print('---------------------------------------------')
        #print('h_i',h_i,'recurrent_kernel_i_c',recurrent_kernel_i_c)
        #print('---------------------------------------------')

        h_i_c = self.recurrent_conv_u(h_i, recurrent_kernel_i_c)

        h_f_c = self.recurrent_conv_u(h_f, recurrent_kernel_f_c)

        h_c_c = self.recurrent_conv_u(h_c, recurrent_kernel_c_c)

        h_o_c = self.recurrent_conv_u(h_o, recurrent_kernel_o_c)

        c_c = self.recurrent_conv_u(c_tm1, recurrent_kernel_c_1)

        ##############################################################
        #print('x_f_c',x_f_c,'h_f_c',h_f_c)
        #print('x_i_c',x_i_c,'h_i_c',h_i_c)
        i = self.recurrent_activation(x_i_c + h_i_c)
        #print('i',i)
        f = self.recurrent_activation(x_f_c + h_f_c)
        #print('f',f,'c_c',c_c)
        c = f * c_c + i * self.activation(x_c_c + h_c_c)
        o = self.recurrent_activation(x_o_c + h_o_c)
        h = o * self.activation(c)

        return h, [h, c]
Beispiel #39
0
 def loop_fn(i):
     x1 = array_ops.gather(x, i)
     return (array_ops.split(x1, [2, 1, 3],
                             axis=0), array_ops.split(x1, [3], axis=-1))
    def pack(self, grouped_grads_and_vars):
        """Pack tensors."""
        self.grouped_grads_and_vars = grouped_grads_and_vars
        self.all_device_shapes = []
        self.all_device_sizes = []

        device_grad_packs = []
        for device_grads_and_vars in grouped_grads_and_vars:
            with ops.colocate_with(device_grads_and_vars[0][0]):
                # Flatten all the grads.
                flat_grads = [
                    array_ops.reshape(g, [-1])
                    for g, _ in device_grads_and_vars
                ]
                # Remember the original shape of all the grads.
                device_shapes = [
                    array_ops.shape(g) for g, _ in device_grads_and_vars
                ]
                # Remember the original sizes of all the grads.
                device_sizes = [
                    array_ops.size(g) for g, _ in device_grads_and_vars
                ]
                # Concat all the flat grads into a big flat tensor.
                concat_grads = array_ops.concat(flat_grads, 0)

                # Split the big tensor into num_splits packs. In cases where the
                # total size is not divisible num_splits, the last pack gets
                # more elements.
                # TODO(zhengxq): it is also possible to optimize away all the concat
                # as well.
                num_splits = self.num_packs

                # The array_ops.size function will sometimes remove static shapes. So if
                # all gradient shapes are defined, we use another method to get the
                # total size.
                # TODO(yuefengz): move this logic to array_ops.size.
                if all(g.shape.is_fully_defined()
                       for g, _ in device_grads_and_vars):
                    total_grad_size = sum([
                        g.shape.num_elements()
                        for g, _ in device_grads_and_vars
                    ])
                else:
                    total_grad_size = array_ops.size(concat_grads)

                split_size = total_grad_size // num_splits
                split_size_last = total_grad_size - split_size * (num_splits -
                                                                  1)
                split_sizes = [split_size] * (num_splits - 1) + [
                    split_size_last
                ]
                grad_packs = array_ops.split(concat_grads, split_sizes)

                # Ready to aggregate the repacked gradients, with fake variables.
                # TODO(zhengxq): It is hacky to have to use fake variables.
                # We should remove the need for variables in
                # aggregate_gradients_using*.
                device_grad_packs.append(zip(grad_packs, [None] * num_splits))
                self.all_device_shapes.append(device_shapes)
                self.all_device_sizes.append(device_sizes)

        return device_grad_packs
Beispiel #41
0
  def call(self, inputs, state):
    """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, `[batch, num_units].
      state: if `state_is_tuple` is False, this must be a state Tensor,
        `2-D, [batch, state_size]`.  If `state_is_tuple` is True, this must be a
        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
        `m_state`.

    Returns:
      A tuple containing:

      - A `2-D, [batch, output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.

    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
    num_proj = self._num_units if self._num_proj is None else self._num_proj
    sigmoid = math_ops.sigmoid

    if self._state_is_tuple:
      (c_prev, m_prev) = state
    else:
      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

    input_size = inputs.get_shape().with_rank(2)[1]
    if input_size.value is None:
      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")

    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    lstm_matrix = math_ops.matmul(
        array_ops.concat([inputs, m_prev], 1), self._masked_kernel)
    lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias)

    i, j, f, o = array_ops.split(
        value=lstm_matrix, num_or_size_splits=4, axis=1)
    # Diagonal connections
    if self._use_peepholes:
      c = (
          sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
          sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
    else:
      c = (
          sigmoid(f + self._forget_bias) * c_prev +
          sigmoid(i) * self._activation(j))

    if self._cell_clip is not None:
      # pylint: disable=invalid-unary-operand-type
      c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
      # pylint: enable=invalid-unary-operand-type
    if self._use_peepholes:
      m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
    else:
      m = sigmoid(o) * self._activation(c)

    if self._num_proj is not None:
      m = math_ops.matmul(m, self._proj_kernel)

      if self._proj_clip is not None:
        # pylint: disable=invalid-unary-operand-type
        m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
        # pylint: enable=invalid-unary-operand-type

    new_state = (
        tf_rnn.LSTMStateTuple(c, m)
        if self._state_is_tuple else array_ops.concat([c, m], 1))
    return m, new_state
Beispiel #42
0
 def odd_input(off, size):
     helper, off = array_ops.split(off, [1, size - 1], 1)
     size -= 1
     off = even_input(off, size)
     off = array_ops.concat([helper, off], 1)
     return off
Beispiel #43
0
 def split_fn(inp, num_shards, axis, name):
     with ops.colocate_with(inp):
         return array_ops.split(inp,
                                num_shards,
                                axis=axis,
                                name=name)
Beispiel #44
0
    def batch_all_reduce(self,
                         input_tensor_packs,
                         communication_hint='AUTO',
                         timeout=0):
        """Batch all-reduce dense tensors.

    This takes a list of batches of tensors. Using multiple batches have the
    benefit that it doesn't need to wait for all inputs to be ready to start the
    all-reduce.

    This can be called in eager mode if a async executor is supplied when
    creating the launcher.

    Args:
      input_tensor_packs: a list of lists of dense tensors.
      communication_hint: string providing hint to runtime for choosing
        collective implementation.
      timeout: a float. The timeout in seconds.

    Returns:
      A flat list of reduced tensors.
    """
        # We don't batch with concat in eager. It's easy to get it wrong because
        # we need to avoid any numpy() calls on values produced by the async
        # executor. This effectively disables batching in eager, but it's unlikely
        # to all-reduce a large number of tensors in eager.
        batch_with_concat = (not self._use_scoped_allocator()
                             and not context.executing_eagerly())
        outputs = []
        for pack in input_tensor_packs:
            # TODO(b/169168846): inserts a parallel all_gather to verify packings
            # are the same on each replica.
            if batch_with_concat:
                with ops.device(self._device):
                    flat_tensors = [array_ops.reshape(t, [-1]) for t in pack]
                    shapes = [array_ops.shape(t) for t in pack]
                    if communication_hint == 'NCCL' and outputs:
                        control_input = outputs[-1]
                    else:
                        control_input = None
                    reduced = self.all_reduce(
                        array_ops.concat(flat_tensors, axis=0), control_input,
                        communication_hint, timeout)
                    num_elements = [math_ops.reduce_prod(s) for s in shapes]
                    flat_outputs = array_ops.split(reduced,
                                                   num_elements,
                                                   axis=0)
                    for shape, flat_output in zip(shapes, flat_outputs):
                        outputs.append(array_ops.reshape(flat_output, shape))
            else:
                # By placing all CollectiveReduce ops in a batch under single name
                # scope, we ensure they will be picked up by the `ScopedAllocator`
                # grappler optimizer and packed into a single all-reduce.
                with ops.name_scope('allreduce'):
                    for input_tensor in pack:
                        if communication_hint == 'NCCL' and outputs:
                            control_input = outputs[-1]
                        else:
                            control_input = None
                        outputs.append(
                            self.all_reduce(input_tensor, control_input,
                                            communication_hint, timeout))

        return outputs
Beispiel #45
0
def frame(signal, frame_length, frame_step, pad_end=False, pad_value=0, axis=-1,
          name=None):
  """Expands `signal`'s `axis` dimension into frames of `frame_length`.

  Slides a window of size `frame_length` over `signal`'s `axis` dimension
  with a stride of `frame_step`, replacing the `axis` dimension with
  `[frames, frame_length]` frames.

  If `pad_end` is True, window positions that are past the end of the `axis`
  dimension are padded with `pad_value` until the window moves fully past the
  end of the dimension. Otherwise, only window positions that fully overlap the
  `axis` dimension are produced.

  For example:

  ```python
  # A batch size 3 tensor of 9152 audio samples.
  audio = tf.random.normal([3, 9152])

  # Compute overlapping frames of length 512 with a step of 180 (frames overlap
  # by 332 samples). By default, only 50 frames are generated since the last
  # 152 samples do not form a full frame.
  frames = tf.signal.frame(audio, 512, 180)
  frames.shape.assert_is_compatible_with([3, 50, 512])

  # When pad_end is enabled, the final frame is kept (padded with zeros).
  frames = tf.signal.frame(audio, 512, 180, pad_end=True)
  frames.shape.assert_is_compatible_with([3, 51, 512])
  ```

  Args:
    signal: A `[..., samples, ...]` `Tensor`. The rank and dimensions
      may be unknown. Rank must be at least 1.
    frame_length: The frame length in samples. An integer or scalar `Tensor`.
    frame_step: The frame hop size in samples. An integer or scalar `Tensor`.
    pad_end: Whether to pad the end of `signal` with `pad_value`.
    pad_value: An optional scalar `Tensor` to use where the input signal
      does not exist when `pad_end` is True.
    axis: A scalar integer `Tensor` indicating the axis to frame. Defaults to
      the last axis. Supports negative values for indexing from the end.
    name: An optional name for the operation.

  Returns:
    A `Tensor` of frames with shape `[..., frames, frame_length, ...]`.

  Raises:
    ValueError: If `frame_length`, `frame_step`, `pad_value`, or `axis` are not
      scalar.
  """
  with ops.name_scope(name, "frame", [signal, frame_length, frame_step,
                                      pad_value]):
    signal = ops.convert_to_tensor(signal, name="signal")
    frame_length = ops.convert_to_tensor(frame_length, name="frame_length")
    frame_step = ops.convert_to_tensor(frame_step, name="frame_step")
    axis = ops.convert_to_tensor(axis, name="axis")

    signal.shape.with_rank_at_least(1)
    frame_length.shape.assert_has_rank(0)
    frame_step.shape.assert_has_rank(0)
    axis.shape.assert_has_rank(0)

    result_shape = _infer_frame_shape(signal, frame_length, frame_step, pad_end,
                                      axis)

    def maybe_constant(val):
      val_static = tensor_util.constant_value(val)
      return (val_static, True) if val_static is not None else (val, False)

    signal_shape, signal_shape_is_static = maybe_constant(
        array_ops.shape(signal))
    axis, axis_is_static = maybe_constant(axis)

    if signal_shape_is_static and axis_is_static:
      # Axis can be negative. Convert it to positive.
      axis = range(len(signal_shape))[axis]
      outer_dimensions, length_samples, inner_dimensions = np.split(
          signal_shape, indices_or_sections=[axis, axis + 1])
      length_samples = length_samples.item()
    else:
      signal_rank = array_ops.rank(signal)
      # Axis can be negative. Convert it to positive.
      axis = math_ops.range(signal_rank)[axis]
      outer_dimensions, length_samples, inner_dimensions = array_ops.split(
          signal_shape, [axis, 1, signal_rank - 1 - axis])
      length_samples = array_ops.reshape(length_samples, [])
    num_outer_dimensions = array_ops.size(outer_dimensions)
    num_inner_dimensions = array_ops.size(inner_dimensions)

    # If padding is requested, pad the input signal tensor with pad_value.
    if pad_end:
      pad_value = ops.convert_to_tensor(pad_value, signal.dtype)
      pad_value.shape.assert_has_rank(0)

      # Calculate number of frames, using double negatives to round up.
      num_frames = -(-length_samples // frame_step)

      # Pad the signal by up to frame_length samples based on how many samples
      # are remaining starting from last_frame_position.
      pad_samples = math_ops.maximum(
          0, frame_length + frame_step * (num_frames - 1) - length_samples)

      # Pad the inner dimension of signal by pad_samples.
      paddings = array_ops.concat(
          [array_ops.zeros([num_outer_dimensions, 2], dtype=pad_samples.dtype),
           [[0, pad_samples]],
           array_ops.zeros([num_inner_dimensions, 2], dtype=pad_samples.dtype)],
          0)
      signal = array_ops.pad(signal, paddings, constant_values=pad_value)

      signal_shape = array_ops.shape(signal)
      length_samples = signal_shape[axis]
    else:
      num_frames = math_ops.maximum(
          0, 1 + (length_samples - frame_length) // frame_step)

    subframe_length, _ = maybe_constant(util_ops.gcd(frame_length, frame_step))
    subframes_per_frame = frame_length // subframe_length
    subframes_per_hop = frame_step // subframe_length
    num_subframes = length_samples // subframe_length

    slice_shape = array_ops.concat([outer_dimensions,
                                    [num_subframes * subframe_length],
                                    inner_dimensions], 0)
    subframe_shape = array_ops.concat([outer_dimensions,
                                       [num_subframes, subframe_length],
                                       inner_dimensions], 0)
    subframes = array_ops.reshape(array_ops.strided_slice(
        signal, array_ops.zeros_like(signal_shape),
        slice_shape), subframe_shape)

    # frame_selector is a [num_frames, subframes_per_frame] tensor
    # that indexes into the appropriate frame in subframes. For example:
    # [[0, 0, 0, 0], [2, 2, 2, 2], [4, 4, 4, 4]]
    frame_selector = array_ops.reshape(
        math_ops.range(num_frames) * subframes_per_hop, [num_frames, 1])

    # subframe_selector is a [num_frames, subframes_per_frame] tensor
    # that indexes into the appropriate subframe within a frame. For example:
    # [[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]]
    subframe_selector = array_ops.reshape(
        math_ops.range(subframes_per_frame), [1, subframes_per_frame])

    # Adding the 2 selector tensors together produces a [num_frames,
    # subframes_per_frame] tensor of indices to use with tf.gather to select
    # subframes from subframes. We then reshape the inner-most
    # subframes_per_frame dimension to stitch the subframes together into
    # frames. For example: [[0, 1, 2, 3], [2, 3, 4, 5], [4, 5, 6, 7]].
    selector = frame_selector + subframe_selector

    frames = array_ops.reshape(
        array_ops.gather(subframes, selector, axis=axis),
        array_ops.concat([outer_dimensions, [num_frames, frame_length],
                          inner_dimensions], 0))

    if result_shape:
      frames.set_shape(result_shape)
    return frames
Beispiel #46
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of G-LSTM.

        Args:
          inputs: input Tensor, 2D, batch x num_units.
          state: this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`.
          scope: not used

        Returns:
          A tuple containing:

          - A `2-D, [batch x output_dim]`, Tensor representing the output of the
            G-LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of G-LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.

        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        (c_prev, m_prev) = state

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
        dtype = inputs.dtype
        with vs.variable_scope(scope or "glstm_cell",
                               initializer=self._initializer):
            i_parts = []
            j_parts = []
            f_parts = []
            o_parts = []

            for group_id in xrange(self._number_of_groups):
                with vs.variable_scope("group%d"%group_id):
                    x_g_id = array_ops.concat([self._get_input_for_group(inputs, group_id, self._group_shape[0]),
                                               self._get_input_for_group(m_prev, group_id, self._group_shape[0])], axis=1)
                    R_k = linear(x_g_id, 4 * self._group_shape[1], bias=False, scope=scope) #will add per gate biases later
                    i_k, j_k, f_k, o_k = array_ops.split(R_k, 4, 1)

                i_parts.append(i_k)
                j_parts.append(j_k)
                f_parts.append(f_k)
                o_parts.append(o_k)

            #it is more efficient to have per gate biases then per gate, per group
            bi = vs.get_variable(name="biases_i",
                                 shape=[self._num_units],
                                 dtype=dtype,
                                 initializer=init_ops.constant_initializer(0.0, dtype=dtype))
            bj = vs.get_variable(name="biases_j",
                                 shape=[self._num_units],
                                 dtype=dtype,
                                 initializer=init_ops.constant_initializer(0.0, dtype=dtype))
            bf = vs.get_variable(name="biases_f",
                                 shape=[self._num_units],
                                 dtype=dtype,
                                 initializer=init_ops.constant_initializer(0.0, dtype=dtype))
            bo = vs.get_variable(name="biases_o",
                                 shape=[self._num_units],
                                 dtype=dtype,
                                 initializer=init_ops.constant_initializer(0.0, dtype=dtype))

            i = nn_ops.bias_add(array_ops.concat(i_parts, axis=1), bi)
            j = nn_ops.bias_add(array_ops.concat(j_parts, axis=1), bj)
            f = nn_ops.bias_add(array_ops.concat(f_parts, axis=1), bf)
            o = nn_ops.bias_add(array_ops.concat(o_parts, axis=1), bo)

        c = math_ops.sigmoid(f + self._forget_bias) * c_prev + math_ops.sigmoid(i) * math_ops.tanh(j)
        m = math_ops.sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            with vs.variable_scope("projection"):
                m = linear(m, self._num_proj, bias=False, scope=scope)

        new_state = LSTMStateTuple(c, m)
        return m, new_state
Beispiel #47
0
    def call(self, inputs, states, constants, training=False):
        org = inputs
        constants = states + list(constants)
        inputs = self.input_norm(inputs)
        # import pdb; pdb.set_trace()
        h_tm1 = constants[0]  # previous memory
        dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=3)
        if 0. < self.dropout < 1.:
            inputs = inputs * dp_mask[0]
        # inputs = self.split_heads_2to3(inputs, self.heads)a
        # multi_h_tm1 = self.split_heads_2to3(h_tm1, self.heads)
        matrix_x = K.dot(inputs, self.kernel)
        matrix_inner = K.dot(h_tm1, self.recurrent_kernel)
        # matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units])
        if self.use_bias:
            # input_bias = self.split_heads_2to3(input_bias)
            # recurrent_bias = self.split_heads_2to3(recurrent_bias)
            matrix_x = K.bias_add(matrix_x, self.bias)
        x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=-1)
        recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner,
                                                                3,
                                                                axis=-1)
        if constants[1] is not None:
            attention_context = self.heads_attention_wrapper(
                h_tm1, constants, training)
            att_z, att_r, att_h = array_ops.split(attention_context,
                                                  3,
                                                  axis=-1)
            org_z = x_z + recurrent_z + att_z
            # org_z = self.output_z(z)
            # z = self.z_heads_filter(self.z_layer_norm(org_z))
            z = self.z_heads_filter(self.z_layer_norm(org_z))
            if 0. < self.dropout < 1.:
                z = z * dp_mask[0]
            z = z + org_z

            r = x_r + recurrent_r + att_r
            # r = self.r_heads_filter(r)
            r = self.r_heads_filter(r)
            # recurrent_h = K.dot(r * h_tm1,
            #                     self.recurrent_kernel[:, 2 * self.units:])
            org_hh = x_h + r * recurrent_h + att_h
            # hh = self.h_heads_filter(self.hh_layer_norm(org_hh))
            hh = self.h_heads_filter(self.hh_layer_norm(org_hh))
            if 0. < self.dropout < 1.:
                hh = hh * dp_mask[0]
            hh = hh + org_hh
            # hh = self.hh_layer_norm(hh)
        else:
            z = self.z_heads_filter(x_z + recurrent_z)
            r = self.recurrent_activation(x_r + recurrent_r)

            hh = self.h_heads_filter(x_h + r * recurrent_h)
        # previous and candidate state mixed by update gate
        h = z * h_tm1 + (1 - z) * hh
        # h = tf.reshape(h, [-1, self.units])
        # h = self.heads_filter(h)
        # h = self.heads_filter(h)
        # if training:
        #     h = tf.nn.dropout(h, self.dropout)
        # if 0. < self.dropout < 1.:
        #     h = h * dp_mask[0]
        # org = org + h
        # h = self.heads_filter(self.h_norm(org))
        if training:
            # h = tf.nn.dropout(h,self.dropout)
            h = h * dp_mask[0]
        h = org + h
        # # last = constants[0] + h
        # #
        # h = self.heads_filter(self.layer_norm_filter(org), training)
        # if training:
        #     h = tf.nn.dropout(h, self.dropout)
        # h = self.layer_norm(h + org)
        # c = constants[1] + c
        return h, [h]
Beispiel #48
0
def linear_to_mel_weight_matrix(num_mel_bins=20,
                                num_spectrogram_bins=129,
                                sample_rate=8000,
                                lower_edge_hertz=125.0,
                                upper_edge_hertz=3800.0,
                                dtype=dtypes.float32,
                                name=None):
    """Returns a matrix to warp linear scale spectrograms to the [mel scale][mel].

  Returns a weight matrix that can be used to re-weight a `Tensor` containing
  `num_spectrogram_bins` linearly sampled frequency information from
  `[0, sample_rate / 2]` into `num_mel_bins` frequency information from
  `[lower_edge_hertz, upper_edge_hertz]` on the [mel scale][mel].

  For example, the returned matrix `A` can be used to right-multiply a
  spectrogram `S` of shape `[frames, num_spectrogram_bins]` of linear
  scale spectrum values (e.g. STFT magnitudes) to generate a "mel spectrogram"
  `M` of shape `[frames, num_mel_bins]`.

      # `S` has shape [frames, num_spectrogram_bins]
      # `M` has shape [frames, num_mel_bins]
      M = tf.matmul(S, A)

  The matrix can be used with `tf.tensordot` to convert an arbitrary rank
  `Tensor` of linear-scale spectral bins into the mel scale.

      # S has shape [..., num_spectrogram_bins].
      # M has shape [..., num_mel_bins].
      M = tf.tensordot(S, A, 1)
      # tf.tensordot does not support shape inference for this case yet.
      M.set_shape(S.shape[:-1].concatenate(A.shape[-1:]))

  Args:
    num_mel_bins: Python int. How many bands in the resulting mel spectrum.
    num_spectrogram_bins: An integer `Tensor`. How many bins there are in the
      source spectrogram data, which is understood to be `fft_size // 2 + 1`,
      i.e. the spectrogram only contains the nonredundant FFT bins.
    sample_rate: Python float. Samples per second of the input signal used to
      create the spectrogram. We need this to figure out the actual frequencies
      for each spectrogram bin, which dictates how they are mapped into the mel
      scale.
    lower_edge_hertz: Python float. Lower bound on the frequencies to be
      included in the mel spectrum. This corresponds to the lower edge of the
      lowest triangular band.
    upper_edge_hertz: Python float. The desired top edge of the highest
      frequency band.
    dtype: The `DType` of the result matrix. Must be a floating point type.
    name: An optional name for the operation.

  Returns:
    A `Tensor` of shape `[num_spectrogram_bins, num_mel_bins]`.

  Raises:
    ValueError: If num_mel_bins/num_spectrogram_bins/sample_rate are not
      positive, lower_edge_hertz is negative, frequency edges are incorrectly
      ordered, or upper_edge_hertz is larger than the Nyquist frequency.

  [mel]: https://en.wikipedia.org/wiki/Mel_scale
  """
    with ops.name_scope(name, 'linear_to_mel_weight_matrix') as name:
        # Note: As num_spectrogram_bins is passed to `math_ops.linspace`
        # and the validation is already done in linspace (both in shape function
        # and in kernel), there is no need to validate num_spectrogram_bins here.
        _validate_arguments(num_mel_bins, sample_rate, lower_edge_hertz,
                            upper_edge_hertz, dtype)

        # This function can be constant folded by graph optimization since there are
        # no Tensor inputs.
        sample_rate = ops.convert_to_tensor(sample_rate,
                                            dtype,
                                            name='sample_rate')
        lower_edge_hertz = ops.convert_to_tensor(lower_edge_hertz,
                                                 dtype,
                                                 name='lower_edge_hertz')
        upper_edge_hertz = ops.convert_to_tensor(upper_edge_hertz,
                                                 dtype,
                                                 name='upper_edge_hertz')
        zero = ops.convert_to_tensor(0.0, dtype)

        # HTK excludes the spectrogram DC bin.
        bands_to_zero = 1
        nyquist_hertz = sample_rate / 2.0
        linear_frequencies = math_ops.linspace(
            zero, nyquist_hertz, num_spectrogram_bins)[bands_to_zero:]
        spectrogram_bins_mel = array_ops.expand_dims(
            _hertz_to_mel(linear_frequencies), 1)

        # Compute num_mel_bins triples of (lower_edge, center, upper_edge). The
        # center of each band is the lower and upper edge of the adjacent bands.
        # Accordingly, we divide [lower_edge_hertz, upper_edge_hertz] into
        # num_mel_bins + 2 pieces.
        band_edges_mel = shape_ops.frame(math_ops.linspace(
            _hertz_to_mel(lower_edge_hertz), _hertz_to_mel(upper_edge_hertz),
            num_mel_bins + 2),
                                         frame_length=3,
                                         frame_step=1)

        # Split the triples up and reshape them into [1, num_mel_bins] tensors.
        lower_edge_mel, center_mel, upper_edge_mel = tuple(
            array_ops.reshape(t, [1, num_mel_bins])
            for t in array_ops.split(band_edges_mel, 3, axis=1))

        # Calculate lower and upper slopes for every spectrogram bin.
        # Line segments are linear in the mel domain, not Hertz.
        lower_slopes = (spectrogram_bins_mel -
                        lower_edge_mel) / (center_mel - lower_edge_mel)
        upper_slopes = (upper_edge_mel -
                        spectrogram_bins_mel) / (upper_edge_mel - center_mel)

        # Intersect the line segments with each other and zero.
        mel_weights_matrix = math_ops.maximum(
            zero, math_ops.minimum(lower_slopes, upper_slopes))

        # Re-add the zeroed lower bins we sliced out above.
        return array_ops.pad(mel_weights_matrix, [[bands_to_zero, 0], [0, 0]],
                             name=name)
Beispiel #49
0
 def _split(self, params):
     return array_ops.split(params, 2, axis=-1)
def inverse_mdct(mdcts,
                 window_fn=window_ops.vorbis_window,
                 norm=None,
                 name=None):
  """Computes the inverse modified DCT of `mdcts`.

  To reconstruct an original waveform, the same window function should
  be used with `mdct` and `inverse_mdct`.

  Example usage:

  >>> @tf.function
  ... def compare_round_trip():
  ...   samples = 1000
  ...   frame_length = 400
  ...   halflen = frame_length // 2
  ...   waveform = tf.random.normal(dtype=tf.float32, shape=[samples])
  ...   waveform_pad = tf.pad(waveform, [[halflen, 0],])
  ...   mdct = tf.signal.mdct(waveform_pad, frame_length, pad_end=True,
  ...                         window_fn=tf.signal.vorbis_window)
  ...   inverse_mdct = tf.signal.inverse_mdct(mdct,
  ...                                         window_fn=tf.signal.vorbis_window)
  ...   inverse_mdct = inverse_mdct[halflen: halflen + samples]
  ...   return waveform, inverse_mdct
  >>> waveform, inverse_mdct = compare_round_trip()
  >>> np.allclose(waveform.numpy(), inverse_mdct.numpy(), rtol=1e-3, atol=1e-4)
  True

  Implemented with TPU/GPU-compatible ops and supports gradients.

  Args:
    mdcts: A `float32`/`float64` `[..., frames, frame_length // 2]`
      `Tensor` of MDCT bins representing a batch of `frame_length // 2`-point
      MDCTs.
    window_fn: A callable that takes a frame_length and a `dtype` keyword
      argument and returns a `[frame_length]` `Tensor` of samples in the
      provided datatype. If set to `None`, a rectangular window with a scale of
      1/sqrt(2) is used. For perfect reconstruction of a signal from `mdct`
      followed by `inverse_mdct`, please use `tf.signal.vorbis_window`,
      `tf.signal.kaiser_bessel_derived_window` or `None`. If using another
      window function, make sure that w[n]^2 + w[n + frame_length // 2]^2 = 1
      and w[n] = w[frame_length - n - 1] for n = 0,...,frame_length // 2 - 1 to
      achieve perfect reconstruction.
    norm: If "ortho", orthonormal inverse DCT4 is performed, if it is None,
      a regular dct4 followed by scaling of `1/frame_length` is performed.
    name: An optional name for the operation.

  Returns:
    A `[..., samples]` `Tensor` of `float32`/`float64` signals representing
    the inverse MDCT for each input MDCT in `mdcts` where `samples` is
    `(frames - 1) * (frame_length // 2) + frame_length`.

  Raises:
    ValueError: If `mdcts` is not at least rank 2.

  [mdct]: https://en.wikipedia.org/wiki/Modified_discrete_cosine_transform
  """
  with ops.name_scope(name, 'inverse_mdct', [mdcts]):
    mdcts = ops.convert_to_tensor(mdcts, name='mdcts')
    mdcts.shape.with_rank_at_least(2)
    half_len = math_ops.cast(mdcts.shape[-1], dtype=dtypes.int32)

    if norm is None:
      half_len_float = math_ops.cast(half_len, dtype=mdcts.dtype)
      result_idct4 = (0.5 / half_len_float) * dct_ops.dct(mdcts, type=4)
    elif norm == 'ortho':
      result_idct4 = dct_ops.dct(mdcts, type=4, norm='ortho')
    split_result = array_ops.split(result_idct4, 2, axis=-1)
    real_frames = array_ops.concat((split_result[1],
                                    -array_ops.reverse(split_result[1], [-1]),
                                    -array_ops.reverse(split_result[0], [-1]),
                                    -split_result[0]), axis=-1)

    # Optionally window and overlap-add the inner 2 dimensions of real_frames
    # into a single [samples] dimension.
    if window_fn is not None:
      window = window_fn(2 * half_len, dtype=mdcts.dtype)
      real_frames *= window
    else:
      real_frames *= 1.0 / np.sqrt(2)
    return reconstruction_ops.overlap_and_add(real_frames, half_len)
Beispiel #51
0
 def _bag_features(self, tree_num, input_data):
     split_data = array_ops.split(1, self.params.num_features, input_data)
     return array_ops.concat(
         1,
         [split_data[ind] for ind in self.params.bagged_features[tree_num]])
def mdct(signals, frame_length, window_fn=window_ops.vorbis_window,
         pad_end=False, norm=None, name=None):
  """Computes the [Modified Discrete Cosine Transform][mdct] of `signals`.

  Implemented with TPU/GPU-compatible ops and supports gradients.

  Args:
    signals: A `[..., samples]` `float32`/`float64` `Tensor` of real-valued
      signals.
    frame_length: An integer scalar `Tensor`. The window length in samples
      which must be divisible by 4.
    window_fn: A callable that takes a frame_length and a `dtype` keyword
      argument and returns a `[frame_length]` `Tensor` of samples in the
      provided datatype. If set to `None`, a rectangular window with a scale of
      1/sqrt(2) is used. For perfect reconstruction of a signal from `mdct`
      followed by `inverse_mdct`, please use `tf.signal.vorbis_window`,
      `tf.signal.kaiser_bessel_derived_window` or `None`. If using another
      window function, make sure that w[n]^2 + w[n + frame_length // 2]^2 = 1
      and w[n] = w[frame_length - n - 1] for n = 0,...,frame_length // 2 - 1 to
      achieve perfect reconstruction.
    pad_end: Whether to pad the end of `signals` with zeros when the provided
      frame length and step produces a frame that lies partially past its end.
    norm: If it is None, unnormalized dct4 is used, if it is "ortho"
      orthonormal dct4 is used.
    name: An optional name for the operation.

  Returns:
    A `[..., frames, frame_length // 2]` `Tensor` of `float32`/`float64`
    MDCT values where `frames` is roughly `samples // (frame_length // 2)`
    when `pad_end=False`.

  Raises:
    ValueError: If `signals` is not at least rank 1, `frame_length` is
      not scalar, or `frame_length` is not a multiple of `4`.

  [mdct]: https://en.wikipedia.org/wiki/Modified_discrete_cosine_transform
  """
  with ops.name_scope(name, 'mdct', [signals, frame_length]):
    signals = ops.convert_to_tensor(signals, name='signals')
    signals.shape.with_rank_at_least(1)
    frame_length = ops.convert_to_tensor(frame_length, name='frame_length')
    frame_length.shape.assert_has_rank(0)
    # Assert that frame_length is divisible by 4.
    frame_length_static = tensor_util.constant_value(frame_length)
    if frame_length_static is not None:
      if frame_length_static % 4 != 0:
        raise ValueError('The frame length must be a multiple of 4.')
      frame_step = ops.convert_to_tensor(frame_length_static // 2,
                                         dtype=frame_length.dtype)
    else:
      frame_step = frame_length // 2

    framed_signals = shape_ops.frame(
        signals, frame_length, frame_step, pad_end=pad_end)

    # Optionally window the framed signals.
    if window_fn is not None:
      window = window_fn(frame_length, dtype=framed_signals.dtype)
      framed_signals *= window
    else:
      framed_signals *= 1.0 / np.sqrt(2)

    split_frames = array_ops.split(framed_signals, 4, axis=-1)
    frame_firsthalf = -array_ops.reverse(split_frames[2],
                                         [-1]) - split_frames[3]
    frame_secondhalf = split_frames[0] - array_ops.reverse(split_frames[1],
                                                           [-1])
    frames_rearranged = array_ops.concat((frame_firsthalf, frame_secondhalf),
                                         axis=-1)
    # Below call produces the (frame_length // 2) unique components of the
    # type 4 orthonormal DCT of the real windowed signals in frames_rearranged.
    return dct_ops.dct(frames_rearranged, type=4, norm=norm)
Beispiel #53
0
  def prediction_ops(self, times, values, exogenous_regressors):
    """Compute model predictions given input data.

    Args:
      times: A [batch size, self.window_size] integer Tensor, the first
          self.input_window_size times in each part of the batch indicating
          input features, and the last self.output_window_size times indicating
          prediction times.
      values: A [batch size, self.input_window_size, self.num_features] Tensor
          with input features.
      exogenous_regressors: A [batch size, self.window_size,
          self.exogenous_size] Tensor with exogenous features.
    Returns:
      Tuple (predicted_mean, predicted_covariance), where each element is a
      Tensor with shape [batch size, self.output_window_size,
      self.num_features].
    """
    times.get_shape().assert_is_compatible_with([None, self.window_size])
    batch_size = array_ops.shape(times)[0]
    if self.input_window_size:
      values.get_shape().assert_is_compatible_with(
          [None, self.input_window_size, self.num_features])
    if exogenous_regressors is not None:
      exogenous_regressors.get_shape().assert_is_compatible_with(
          [None, self.window_size, self.exogenous_size])
    # Create input features.
    input_window_features = []
    input_feature_size = 0
    output_window_features = []
    output_feature_size = 0
    if self._periodicities:
      _, time_features = self._compute_time_features(times)
      num_time_features = self._buckets * len(self._periodicities)
      time_features = array_ops.reshape(
          time_features,
          [batch_size,
           self.window_size,
           num_time_features])
      input_time_features, output_time_features = array_ops.split(
          time_features, (self.input_window_size, self.output_window_size),
          axis=1)
      input_feature_size += num_time_features
      output_feature_size += num_time_features
      input_window_features.append(input_time_features)
      output_window_features.append(output_time_features)
    if self.input_window_size:
      inp = array_ops.slice(values, [0, 0, 0], [-1, self.input_window_size, -1])
      input_window_features.append(
          array_ops.reshape(
              inp,
              [batch_size, self.input_window_size, self.num_features]))
      input_feature_size += self.num_features
    if self.exogenous_size:
      input_exogenous_features, output_exogenous_features = array_ops.split(
          exogenous_regressors,
          (self.input_window_size, self.output_window_size),
          axis=1)
      input_feature_size += self.exogenous_size
      output_feature_size += self.exogenous_size
      input_window_features.append(input_exogenous_features)
      output_window_features.append(output_exogenous_features)
    assert input_window_features
    input_window_features = array_ops.concat(input_window_features, axis=2)
    if output_window_features:
      output_window_features = array_ops.concat(output_window_features, axis=2)
    else:
      output_window_features = array_ops.zeros(
          [batch_size, self.output_window_size, 0],
          dtype=self.dtype)
    static_batch_size = times.get_shape().dims[0].value
    input_window_features.set_shape(
        [static_batch_size, self.input_window_size, input_feature_size])
    output_window_features.set_shape(
        [static_batch_size, self.output_window_size, output_feature_size])
    return self._output_window_predictions(input_window_features,
                                           output_window_features)
Beispiel #54
0
    def __call__(
            self,
            inputs,  # 输入包含 x 输入 和 t 的输入
            state,  #  状态包含了细胞状态和隐含层状态
            scope=None):
        """
            Phased long short-term memory cell (P-LSTM).

        """

        with vs.variable_scope(scope or type(self).__name__):

            # Parameters of gates are concatenated into one multiply for efficiency.
            #  初始状态 state 是一个元组 ( c,h)

            if state is tuple:  #  如果是元组的话,就可以直接分开
                c_prev, h_prev = state
            else:  # 如果不是元组的话, 那么就是多维数组 , 就在第二个维度对他们进行划分
                c_prev, h_prev = array_ops.split(value=state,
                                                 num_or_size_splits=2,
                                                 axis=1)

            # (2, batch_size, seq_len)

            # NB: here we explicitly give t as input.

# input的第一个维度长度为2 , 第一个元素是 x 的输入, 第二个元素是时间变量的输入

            x = tf.reshape(inputs[:, 0], (-1, 1))  #  第二个维度的长度是1, 第一个维度根据需要摆放

            #  取最后一个批次的所有的 时间戳变量

            t = inputs[:, 1][
                -1]  # Now we only accept one id. We have a batch so it's a bit more complex.

            # maybe the information should come from the outside. To be defined later.
            #  就是矩阵乘法

            concat = _linear([x, h_prev], 4 * self._num_units,
                             True)  # 这会儿还没有涉及到及激活函数
            # 注意,这里只计算到线性组合的结果是有意义的
            # 因为后面 可以线性组合的后面再加上窥视孔连接
            #  的结果

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate

            i, j, f, o = array_ops.split(value=concat,
                                         num_or_size_splits=4,
                                         axis=1)

            dtype = inputs.dtype

            #  忽然想到  , mask 是针对某一个 time_step 的, 而tau , r_on, s 以及算出来的 kt 是针对某一个
            #  隐含层或者细胞状态  神经元的
            tau = vs.get_variable(
                'tau',
                shape=[self._num_units],  #  为每一个隐含层神经元,细胞状态神经元
                #  分配一个tau--周期, r_on开放比例, s 相位
                initializer=random_exp_initializer(0, self.tau_init),
                dtype=dtype)

            r_on = vs.get_variable('r_on',
                                   shape=[self._num_units],
                                   initializer=init_ops.constant_initializer(
                                       self.r_on_init),
                                   dtype=dtype)

            s = vs.get_variable(
                's',
                shape=[self._num_units],
                initializer=init_ops.random_uniform_initializer(
                    0., tau.initialized_value()),
                dtype=dtype)

            #  tf.tile 的作用 是 rep

            times = tf.tile(tf.reshape(t, [-1, 1]), [1, self._num_units])

            phase = phi(times, s, tau)  #  element-wise calculation

            kappa = time_gate_fast(phase, r_on, self._leak_rate,
                                   self._training_phase)

            w_o_peephole = None  #

            #  如果使用了窥视孔连接的话,那么就把细胞状态的线性组合连接到前面线性组合的
            if self._use_peepholes:

                w_i_peephole = vs.get_variable('W_I_peephole',
                                               shape=[self._num_units],
                                               dtype=dtype)

                w_f_peephole = vs.get_variable('W_F_peephole',
                                               shape=[self._num_units],
                                               dtype=dtype)

                w_o_peephole = vs.get_variable('W_O_peephole',
                                               shape=[self._num_units],
                                               dtype=dtype)

                f += w_f_peephole * c_prev
                i += w_i_peephole * c_prev

            new_c_tilde = sigmoid(f) * c_prev + sigmoid(i) * self._activation(
                j)

            if self._use_peepholes:
                o += w_o_peephole * new_c_tilde

            new_h_tilde = sigmoid(o) * self._activation(new_c_tilde)
            """
            Hi all,
            Yes, Philippe, you are correct in that Equation 4 should reference c_tilde and not c.
            I can add a point to the paper to mention that, and will update Figure 1 so the line is
            correctly drawn to c_tilde instead. The intuition here is that the gates should be blind
            to the effect of the khronos gate; input, forget and output gate should all operate as if
            the cell were a normal LSTM cell, while the khronos gate allows it to either operate or
            not operate (and then linearly interpolates between these two states). If the output gate
            is influenced by the khronos gate (if the peepholes reference c instead of c_tilde), then
            the PLSTM would no longer be a gated LSTM cell, but somehow be self-dependent on the time gate's actual operation.
            I think everyone's right in that it wouldn't influence much -- but it should be updated in
            the paper. Thanks very much for pointing out the issue, Philippe!
            -Danny"""

            # Apply Khronos gate

            new_h = kappa * new_h_tilde + (1 - kappa) * h_prev

            new_c = kappa * new_c_tilde + (1 - kappa) * c_prev

            new_state = (new_c, new_h)

            # 根据采样频率更新 细胞状态
            return new_h, new_state
Beispiel #55
0
  def _process_input_helper(self, update_row_factors,
                            sp_input=None, transpose_input=False,
                            row_weights=None):
    """Creates the graph for processing a sparse slice of input.

    Args:
      update_row_factors: if True, update or project the row_factors, else
        update or project the column factors.
      sp_input: Please refer to comments for update_row_factors,
        update_col_factors, project_row_factors, and project_col_factors for
        restrictions.
      transpose_input: If True, the input is logically transposed and then the
        corresponding rows/columns of the transposed input are updated.
      row_weights: If not None, this is the row/column weights to be used for
        the update or projection. If None, use the corresponding weights from
        the model. Note that the feature (column/row) weights will be
        determined by the model. When not None, it can either be a scalar or
        a rank-1 tensor with the same number of elements as the number of rows
        of columns to be updated/projected.

    Returns:
      A tuple consisting of the following three elements:
      new_values: New values for the row/column factors.
      update_op: An op that assigns the newly computed values to the row/column
        factors.
      loss: A tensor (scalar) that contains the normalized minibatch loss,
        corresponding to sp_input.
    """
    assert isinstance(sp_input, sparse_tensor.SparseTensor)

    if update_row_factors:
      left = self._row_factors
      right_factors = self._col_factors_cache
      row_wt = self._row_wt_cache
      col_wt = self._col_wt_cache
      total_rows = self._input_rows
      sharding_func = WALSModel._get_sharding_func(self._input_rows,
                                                   self._num_row_shards)
      gramian = self._col_gramian_cache
    else:
      left = self._col_factors
      right_factors = self._row_factors_cache
      row_wt = self._col_wt_cache
      col_wt = self._row_wt_cache
      total_rows = self._input_cols
      sharding_func = WALSModel._get_sharding_func(self._input_cols,
                                                   self._num_col_shards)
      gramian = self._row_gramian_cache
      transpose_input = not transpose_input

    # Note that the row indices of sp_input are based on the original full input
    # Here we reindex the rows and give them contiguous ids starting at 0.
    # We use tf.unique to achieve this reindexing. Note that this is done so
    # that the downstream kernel can assume that the input is "dense" along the
    # row dimension.
    row_ids, col_ids = array_ops.split(
        value=sp_input.indices, num_or_size_splits=2, axis=1)
    update_row_indices, all_row_ids = array_ops.unique(row_ids[:, 0])
    update_col_indices, all_col_ids = array_ops.unique(col_ids[:, 0])
    col_ids = array_ops.expand_dims(math_ops.cast(all_col_ids, dtypes.int64), 1)
    row_ids = array_ops.expand_dims(math_ops.cast(all_row_ids, dtypes.int64), 1)

    if transpose_input:
      update_indices = update_col_indices
      row_shape = [
          math_ops.cast(array_ops.shape(update_row_indices)[0], dtypes.int64)
      ]
      gather_indices = update_row_indices
    else:
      update_indices = update_row_indices
      row_shape = [
          math_ops.cast(array_ops.shape(update_col_indices)[0], dtypes.int64)
      ]
      gather_indices = update_col_indices

    num_rows = math_ops.cast(array_ops.shape(update_indices)[0], dtypes.int64)
    col_shape = [num_rows]
    right = embedding_ops.embedding_lookup(
        right_factors, gather_indices, partition_strategy="div")
    new_sp_indices = array_ops.concat([row_ids, col_ids], 1)
    new_sp_shape = (array_ops.concat([row_shape, col_shape], 0) if
                    transpose_input else
                    array_ops.concat([col_shape, row_shape], 0))
    new_sp_input = sparse_tensor.SparseTensor(
        indices=new_sp_indices,
        values=sp_input.values,
        dense_shape=new_sp_shape)

    # Compute lhs and rhs of the normal equations
    total_lhs = (self._unobserved_weight * gramian)
    if self._regularization_matrix is not None:
      total_lhs += self._regularization_matrix
    if self._row_weights is None:
      # Special case of ALS. Use a much simpler update rule.
      total_rhs = (self._unobserved_weight *
                   sparse_ops.sparse_tensor_dense_matmul(
                       new_sp_input, right, adjoint_a=transpose_input))
      # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of
      # transposing explicitly.
      # TODO(rmlarsen): multi-thread tf.matrix_solve.
      new_left_values = array_ops.transpose(
          linalg_ops.matrix_solve(total_lhs, array_ops.transpose(total_rhs)))
    else:
      if row_weights is None:
        # TODO(yifanchen): Add special handling for single shard without using
        # embedding_lookup and perform benchmarks for those cases. Same for
        # col_weights lookup below.
        row_weights_slice = embedding_ops.embedding_lookup(
            row_wt, update_indices, partition_strategy="div")
      else:
        num_indices = array_ops.shape(update_indices)[0]
        with ops.control_dependencies(
            [check_ops.assert_less_equal(array_ops.rank(row_weights), 1)]):
          row_weights_slice = control_flow_ops.cond(
              math_ops.equal(array_ops.rank(row_weights), 0),
              lambda: (array_ops.ones([num_indices]) * row_weights),
              lambda: math_ops.cast(row_weights, dtypes.float32))

      col_weights = embedding_ops.embedding_lookup(
          col_wt, gather_indices, partition_strategy="div")
      partial_lhs, total_rhs = (
          gen_factorization_ops.wals_compute_partial_lhs_and_rhs(
              right,
              col_weights,
              self._unobserved_weight,
              row_weights_slice,
              new_sp_input.indices,
              new_sp_input.values,
              num_rows,
              transpose_input,
              name="wals_compute_partial_lhs_rhs"))
      total_lhs = array_ops.expand_dims(total_lhs, 0) + partial_lhs
      total_rhs = array_ops.expand_dims(total_rhs, -1)
      new_left_values = array_ops.squeeze(
          linalg_ops.matrix_solve(total_lhs, total_rhs), [2])

    update_op_name = "row_update" if update_row_factors else "col_update"
    update_op = self.scatter_update(left, update_indices, new_left_values,
                                    sharding_func, name=update_op_name)

    # Create the loss subgraph
    loss_sp_input = (sparse_ops.sparse_transpose(new_sp_input)
                     if transpose_input else new_sp_input)
    # sp_approx is the low rank estimate of the input matrix, formed by
    # computing the product <u_i, v_j> for (i, j) in loss_sp_input.indices.
    sp_approx_vals = gen_factorization_ops.masked_matmul(
        new_left_values, right, loss_sp_input.indices, transpose_a=False,
        transpose_b=True)
    sp_approx = sparse_tensor.SparseTensor(
        loss_sp_input.indices, sp_approx_vals, loss_sp_input.dense_shape)
    sp_approx_sq = math_ops.square(sp_approx)
    sp_residual = sparse_ops.sparse_add(loss_sp_input, sp_approx * (-1))
    sp_residual_sq = math_ops.square(sp_residual)
    row_wt_mat = (constant_op.constant(0.) if self._row_weights is None else
                  array_ops.expand_dims(row_weights_slice, 1))
    col_wt_mat = (constant_op.constant(0.) if self._col_weights is None else
                  array_ops.expand_dims(col_weights, 0))
    # We return the normalized loss
    partial_row_gramian = math_ops.matmul(
        new_left_values, new_left_values, transpose_a=True)
    normalization_factor = total_rows / math_ops.cast(num_rows, dtypes.float32)
    loss = (
        self._unobserved_weight * (
            sparse_ops.sparse_reduce_sum(sp_residual_sq) -
            sparse_ops.sparse_reduce_sum(sp_approx_sq) +
            math_ops.trace(math_ops.matmul(partial_row_gramian, gramian))
        ) +
        sparse_ops.sparse_reduce_sum(row_wt_mat * (sp_residual_sq * col_wt_mat))
    ) * normalization_factor
    if self._regularization is not None:
      loss += self._regularization * (
          math_ops.trace(partial_row_gramian) * normalization_factor +
          math_ops.trace(gramian)
      )
    return (new_left_values, update_op, loss)
Beispiel #56
0
    def _training_examples_and_variables():
      """Returns dictionaries for training examples and variables."""
      batch_size = targets.get_shape()[0]

      # Iterate over all feature columns and create appropriate lists for dense
      # and sparse features as well as dense and sparse weights (variables) for
      # SDCA.
      # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
      # dict as 1-dimensional tensors.
      dense_features, sparse_features, sparse_feature_with_values = [], [], []
      dense_feature_weights = []
      sparse_feature_weights, sparse_feature_with_values_weights = [], []
      # pylint: disable=protected-access
      for column in sorted(columns_to_variables.keys(), key=lambda x: x.key):
        transformed_tensor = features[column]
        if isinstance(column, layers.feature_column._RealValuedColumn):
          # A real-valued column corresponds to a dense feature in SDCA. A
          # transformed tensor corresponding to a RealValuedColumn has rank 2
          # (its shape is typically [batch_size, column.dimension]) and so it
          # can be passed to SDCA as is.
          dense_features.append(transformed_tensor)
          # For real valued columns, the variables list contains exactly one
          # element.
          dense_feature_weights.append(columns_to_variables[column][0])
        elif isinstance(column, layers.feature_column._BucketizedColumn):
          # A bucketized column corresponds to a sparse feature in SDCA. The
          # bucketized feature is "sparsified" for SDCA by converting it to a
          # SparseFeatureColumn respresenting the one-hot encoding of the
          # bucketized feature.
          dense_bucket_tensor = column.to_dnn_input_layer(transformed_tensor)
          sparse_feature_column = _tensor_to_sparse_feature_column(
              dense_bucket_tensor)
          sparse_feature_with_values.append(sparse_feature_column)
          # For bucketized columns, the variables list contains exactly one
          # element.
          sparse_feature_with_values_weights.append(
              columns_to_variables[column][0])
        elif isinstance(column, (layers.feature_column._CrossedColumn,
                                 layers.feature_column._SparseColumn)):
          sparse_features.append(sdca_ops.SparseFeatureColumn(
              array_ops.reshape(
                  array_ops.split(1, 2, transformed_tensor.indices)[0], [-1]),
              array_ops.reshape(transformed_tensor.values, [-1]), None))
          sparse_feature_weights.append(columns_to_variables[column][0])
        elif isinstance(column, layers.feature_column._WeightedSparseColumn):
          id_tensor = column.id_tensor(transformed_tensor)
          weight_tensor = column.weight_tensor(transformed_tensor)
          sparse_feature_with_values.append(sdca_ops.SparseFeatureColumn(
              array_ops.reshape(
                  array_ops.split(1, 2, id_tensor.indices)[0], [-1]),
              array_ops.reshape(id_tensor.values, [-1]), array_ops.reshape(
                  weight_tensor.values, [-1])))
          sparse_feature_with_values_weights.append(
            columns_to_variables[column][0])
        else:
          raise ValueError('SDCAOptimizer does not support column type %s.' %
                           type(column).__name__)
      # pylint: enable=protected-access

      example_weights = array_ops.reshape(
          features[weight_column_name],
          shape=[-1]) if weight_column_name else array_ops.ones([batch_size])
      example_ids = features[self._example_id_column]
      sparse_feature_with_values.extend(sparse_features)
      sparse_feature_with_values_weights.extend(sparse_feature_weights)
      examples = dict(sparse_features=sparse_feature_with_values,
                      dense_features=dense_features,
                      example_labels=math_ops.to_float(array_ops.reshape(
                          targets, shape=[-1])),
                      example_weights=example_weights,
                      example_ids=example_ids)
      sdca_variables = dict(
          sparse_features_weights=sparse_feature_with_values_weights,
          dense_features_weights=dense_feature_weights)
      return examples, sdca_variables
Beispiel #57
0
    def call(self, inputs, state):
        """
      Run one step of cell,
    
      Args:
        inputs: input Tensor, 2D, batch X num_units
        state: if 'state_is_tuple' is False, this must be a state Tensor, '2D, batch X state_size'. if 'state_is_tuple' is True, this must be a tuple of state Tensors, both '2D' withcolumn sizes 'c_state' and 'm_state'
    """
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                'Could not infer input size from inputs.get_shape()[-1]')
        if self._linear1 is None:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                self._linear1 = _Linear([inputs, m_prev], 4 * self._num_units,
                                        True)
        lstm_matrix = self._linear1([inputs, m_prev])
        # i=input_gate, j=new_input, f=forget_gate, o=output_gate
        i, j, f, o = array_ops.split(value=lstm_matrix,
                                     num_or_size_splits=4,
                                     axis=1)
        if self._ln_i is None:
            self._ln_i = Layer_Normalization([self._num_units], scope='i_norm')
        if self._ln_j is None:
            self._ln_j = Layer_Normalization([self._num_units], scope='j_norm')
        if self._ln_f is None:
            self._ln_f = Layer_Normalization([self._num_units], scope='f_norm')
        if self._ln_o is None:
            self._ln_o = Layer_Normalization([self._num_units], scope='o_norm')
        i = self._ln_i(i)
        j = self._ln_j(j)
        f = self._ln_f(f)
        o = self._ln_o(o)

        # diagonal connections
        if self._use_peepholes and not self._w_f_diag:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                with vs.variable_scope(unit_scope):
                    self._w_f_diag = vs.get_variable("w_f_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_i_diag = vs.get_variable("w_i_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_o_diag = vs.get_variable("w_o_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    if self._ln_p1 is None:
                        self._ln_p1 = Layer_Normalization([self._num_units],
                                                          scope='p1_norm')
                    if self._ln_p2 is None:
                        self._ln_p2 = Layer_Normalization([self._num_units],
                                                          scope='p2_norm')
        if self._use_peepholes:
            peep1 = self._w_f_diag * c_prev
            #if self._ln_p1 is None:
            #  self._ln_p1 = Layer_Normalization([self._num_units], scope='p1_norm')
            peep2 = self._w_i_diag * c_prev
            #if self._ln_p2 is None:
            #  self._ln_p2 = Layer_Normalization([self._num_units], scope='p2_norm')
            c = (sigmoid(f + self._forget_bias + self._ln_p1(peep1)) +
                 sigmoid(i + self._ln_p2(peep2)) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))
        if self._ln_c is None:
            self._ln_c = Layer_Normalization([self._num_units], scope='c_norm')
        c = self._ln_c(c)
        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            if self._linear2 is None:
                scope = vs.get_variable_scope()
                with vs.variable_scope(scope, initializer=self._initializer):
                    with vs.variable_scope("projection") as proj_scope:
                        self._linear2 = _Linear(m, self._num_proj, False)

            m = self._linear2(m)

            if self._proj_clip is not None:
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))
        return m, new_state
Beispiel #58
0
    def call(self, inputs, state):
        """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: if `state_is_tuple` is False, this must be a state Tensor,
        `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
        `m_state`.

    Returns:
      A tuple containing:

      - A `2-D, [batch x output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.

    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        if self._linear1 is None:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                if self._num_unit_shards is not None:
                    unit_scope.set_partitioner(
                        partitioned_variables.fixed_size_partitioner(
                            self._num_unit_shards))
                self._linear1 = _Linear([inputs, m_prev], 4 * self._num_units,
                                        True)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        lstm_matrix = self._linear1([inputs, m_prev])
        i, j, f, o = array_ops.split(value=lstm_matrix,
                                     num_or_size_splits=4,
                                     axis=1)
        # Diagonal connections
        if self._use_peepholes and not self._w_f_diag:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                with vs.variable_scope(unit_scope):
                    self._w_f_diag = vs.get_variable("w_f_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_i_diag = vs.get_variable("w_i_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_o_diag = vs.get_variable("w_o_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)

        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) *
                 c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type

        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            if self._linear2 is None:
                scope = vs.get_variable_scope()
                with vs.variable_scope(scope, initializer=self._initializer):
                    with vs.variable_scope("projection") as proj_scope:
                        if self._num_proj_shards is not None:
                            proj_scope.set_partitioner(
                                partitioned_variables.fixed_size_partitioner(
                                    self._num_proj_shards))
                        self._linear2 = _Linear(m, self._num_proj, False)
            m = self._linear2(m)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))
        return m, new_state
Beispiel #59
0
 def loop_fn(i):
     x1 = array_ops.gather(x, i)
     return array_ops.split(x1, 2, axis=0), array_ops.split(x1,
                                                            3,
                                                            axis=-1)
 def __call__(self, inputs, state, scope=None):
     """Gated recurrent unit (GRU) with nunits cells."""
     with _checked_scope(self, scope or "gru_cell", reuse=self._reuse):
         with vs.variable_scope("gates"):  # Reset gate and update gate.
             # We start with bias of 1.0 to not reset and not update.
             value = sigmoid(
                 _linear([inputs, state], 2 * self._num_units, True, 1.0))
             r, u = array_ops.split(value=value,
                                    num_or_size_splits=2,
                                    axis=1)
             # initialization are from the paper RECURRENT BATCH NORMALIZATION
             r_mean, r_var = tf.nn.moments(r, [1],
                                           name="r_moments",
                                           keep_dims=True)
             u_mean, u_var = tf.nn.moments(r, [1],
                                           name="u_moments",
                                           keep_dims=True)
             with vs.variable_scope("r_beta") as rn:
                 try:
                     rbeta = tf.get_variable(
                         "rbeta",
                         r.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.0))
                     rgamma = tf.get_variable(
                         "rgamma",
                         r.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.1))
                 except ValueError:
                     rn.reuse_variables()
                     rbeta = tf.get_variable(
                         "rbeta",
                         r.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.0))
                     rgamma = tf.get_variable(
                         "rgamma",
                         r.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.1))
             with vs.variable_scope("u_beta") as un:
                 try:
                     ubeta = tf.get_variable(
                         "ubeta",
                         r.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.0))
                     ugamma = tf.get_variable(
                         "ugamma",
                         r.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.1))
                 except ValueError:
                     un.reuse_variables()
                     ubeta = tf.get_variable(
                         "ubeta",
                         r.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.0))
                     ugamma = tf.get_variable(
                         "ugamma",
                         r.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.1))
             r = tf.nn.batch_normalization(r, r_mean, r_var, rbeta, rgamma,
                                           0.000001)
             u = tf.nn.batch_normalization(u, u_mean, u_var, ubeta, ugamma,
                                           0.000001)
         with vs.variable_scope("candidate"):
             #c = self._activation(_linear([inputs, r * state],
             #                            self._num_units, True))
             c = _linear([inputs, r * state], self._num_units, True)
             c_mean, c_var = tf.nn.moments(r, [1],
                                           name="c_moments",
                                           keep_dims=True)
             with vs.variable_scope("c_beta") as cn:
                 try:
                     cbeta = tf.get_variable(
                         "cbeta",
                         c.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.0))
                     cgamma = tf.get_variable(
                         "cgamma",
                         c.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.1))
                 except ValueError:
                     cn.reuse_variables()
                     cbeta = tf.get_variable(
                         "cbeta",
                         c.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.0))
                     cgamma = tf.get_variable(
                         "cgamma",
                         c.get_shape()[1],
                         dtype=tf.float32,
                         initializer=tf.constant_initializer(0.1))
             c = self._activation(
                 tf.nn.batch_normalization(c, c_mean, c_var, cbeta, cgamma,
                                           0.000001))
         new_h = u * state + (1 - u) * c
     return new_h, new_h