def testClipByValueEmptyTensor(self):
   # Test case for GitHub issue 19337
   zero = array_ops.placeholder(dtype=dtypes.float32, shape=None)
   x = clip_ops.clip_by_value(zero, zero, zero)
   y = clip_ops.clip_by_value(zero, 1.0, 1.0)
   z = clip_ops.clip_by_value(zero, zero, 1.0)
   w = clip_ops.clip_by_value(zero, 1.0, zero)
   with self.session(use_gpu=True) as sess:
     sess.run([x, y, z, w], feed_dict={zero: np.zeros((7, 0))})
 def testClipByValueBadShape(self):
   with self.session(use_gpu=True):
     x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3, 1])
     # Use a nonsensical shape.
     clip = constant_op.constant([1.0, 2.0])
     with self.assertRaises(ValueError):
       _ = clip_ops.clip_by_value(x, -clip, clip)
     with self.assertRaises(ValueError):
       _ = clip_ops.clip_by_value(x, 1.0, clip)
  def _get_coordinatewise_learning_rate(self, grad, var):
    # Compute the learning rate using a moving average for the diagonal of BB^T
    avg_first = self.get_slot(var, 'first_moment')
    avg_second = self.get_slot(var, 'second_moment')
    decay_tensor = math_ops.cast(self._decay_tensor, var.dtype)
    batch_size = math_ops.cast(self._batch_size_tensor, var.dtype)

    # Create an estimator for the moving average of gradient mean and variance
    # via Welford's algorithm
    if isinstance(grad, ops.Tensor):
      delta = grad - avg_first
      first_moment_update = avg_first.assign_add(
          array_ops.where(self._counter < 1, math_ops.cast(1, var.dtype),
                          1. - decay_tensor) * delta)

      with ops.control_dependencies([first_moment_update]):
        second_moment_update = avg_second.assign_add(
            math_ops.cast(self._counter < 1, var.dtype) *
            -(1. - decay_tensor) * (
                avg_second - decay_tensor  * math_ops.square(delta)))
      diag_preconditioner = control_flow_ops.with_dependencies(
          [second_moment_update],
          clip_ops.clip_by_value(avg_second, 1e-12, 1e12))
    elif isinstance(grad, ops.IndexedSlices):
      delta = grad.values - array_ops.gather_nd(avg_first, grad.indices)
      first_moment_update = state_ops.scatter_add(
          avg_first,
          grad.indices,
          array_ops.where(self._counter < 1,
                          math_ops.cast(1., var.dtype),
                          1. - decay_tensor) * delta)

      with ops.control_dependencies([first_moment_update]):
        avg_second = state_ops.scatter_add(
            avg_second,
            grad.indices,
            math_ops.cast(self._counter < 1, var.dtype) *
            -(1. - decay_tensor) * (
                array_ops.gather_nd(avg_second, grad.indices) - decay_tensor *
                math_ops.square(delta)))
        avg_second = array_ops.gather_nd(avg_second, grad.indices)
        # TODO(b/70783772)
        diag_preconditioner = clip_ops.clip_by_value(avg_second, 1e-12, 1e12)
    else:
      raise errors.InvalidArgumentError(
          None, None, 'grad must of type Tensor or IndexedSlice')

    diag_preconditioner *= batch_size

    if self._use_single_learning_rate:
      diag_preconditioner = math_ops.reduce_mean(diag_preconditioner)

    # From Theorem 2 Corollary 1 of Mandt et al. 2017
    return 2. * batch_size / (
        math_ops.cast(self._total_num_examples, var.dtype.base_dtype) *
        diag_preconditioner)
Beispiel #4
0
 def testClipByValueBadShape(self):
   with self.test_session(use_gpu=True):
     x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3, 1])
     # Use a nonsensical shape.
     clip = constant_op.constant([1.0, 2.0])
     with self.assertRaises(errors_impl.InvalidArgumentError):
       ans = clip_ops.clip_by_value(x, -clip, clip)
       tf_ans = ans.eval()
     with self.assertRaises(errors_impl.InvalidArgumentError):
       ans = clip_ops.clip_by_value(x, 1.0, clip)
       tf_ans = ans.eval()
Beispiel #5
0
  def testClipByValueGradient(self):
    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32)
    outputs_1 = clip_ops.clip_by_value(inputs, 0.5, 3.5)
    min_val = constant_op.constant([0.5, 0.5, 0.5, 0.5], dtype=dtypes.float32)
    max_val = constant_op.constant([3.5, 3.5, 3.5, 3.5], dtype=dtypes.float32)
    outputs_2 = clip_ops.clip_by_value(inputs, min_val, max_val)
    with self.test_session():
      error_1 = gradient_checker.compute_gradient_error(inputs, [4],
                                                        outputs_1, [4])
      self.assertLess(error_1, 1e-4)

      error_2 = gradient_checker.compute_gradient_error(inputs, [4],
                                                        outputs_2, [4])
      self.assertLess(error_2, 1e-4)
def _do_maximum_mean(samples, envelope, high, name=None):
  """Common code between maximum_mean and minimum_mean."""
  with ops.name_scope(name, "do_maximum_mean", [samples, envelope, high]):
    n = array_ops.rank(samples)
    # Move the batch dimension of `samples` to the rightmost position,
    # where the _batch_sort_vector function wants it.
    perm = array_ops.concat([math_ops.range(1, n), [0]], axis=0)
    samples = array_ops.transpose(samples, perm)

    samples = _batch_sort_vector(samples)

    # The maximum mean is given by taking `envelope`-worth of
    # probability from the smallest samples and moving it to the
    # maximum value.  This amounts to:
    # - ignoring the smallest k samples, where `k/n < envelope`
    # - taking a `1/n - (envelope - k/n)` part of the index k sample
    # - taking all the other samples
    # - and adding `envelope * high` at the end.
    # The following is a vectorized and batched way of computing this.
    # `max_mean_contrib` is a mask implementing the previous.
    batch_size = array_ops.shape(samples)[-1]
    batch_size = math_ops.cast(batch_size, dtype=samples.dtype.base_dtype)
    step = 1. / batch_size
    cum_steps = step * math_ops.range(
        1, batch_size + 1, dtype=samples.dtype.base_dtype)
    max_mean_contrib = clip_ops.clip_by_value(
        cum_steps - envelope[..., array_ops.newaxis],
        clip_value_min=0.,
        clip_value_max=step)
    return math_ops.reduce_sum(
        samples * max_mean_contrib, axis=-1) + envelope * high
Beispiel #7
0
def adjust_brightness(image, delta):
  """Adjust the brightness of RGB or Grayscale images.

  This is a convenience method that converts an RGB image to float
  representation, adjusts its brightness, and then converts it back to the
  original data type. If several adjustments are chained it is advisable to
  minimize the number of redundant conversions.

  The value `delta` is added to all components of the tensor `image`. Both
  `image` and `delta` are converted to `float` before adding (and `image` is
  scaled appropriately if it is in fixed-point representation). For regular
  images, `delta` should be in the range `[0,1)`, as it is added to the image in
  floating point representation, where pixel values are in the `[0,1)` range.

  Args:
    image: A tensor.
    delta: A scalar. Amount to add to the pixel values.

  Returns:
    A brightness-adjusted tensor of the same shape and type as `image`.
  """
  with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name:
    image = ops.convert_to_tensor(image, name='image')
    # Remember original dtype to so we can convert back if needed
    orig_dtype = image.dtype
    flt_image = convert_image_dtype(image, dtypes.float32)

    adjusted = math_ops.add(flt_image,
                            math_ops.cast(delta, dtypes.float32),
                            name=name)
    adjusted = clip_ops.clip_by_value(adjusted, 0.0, 1.0)

    return convert_image_dtype(adjusted, orig_dtype, saturate=True)
Beispiel #8
0
def saturate_cast(image, dtype):
  """Performs a safe cast of image data to `dtype`.

  This function casts the data in image to `dtype`, without applying any
  scaling. If there is a danger that image data would over or underflow in the
  cast, this op applies the appropriate clamping before the cast.

  Args:
    image: An image to cast to a different data type.
    dtype: A `DType` to cast `image` to.

  Returns:
    `image`, safely cast to `dtype`.
  """
  clamped = image

  # When casting to a type with smaller representable range, clamp.
  # Note that this covers casting to unsigned types as well.
  if image.dtype.min < dtype.min and image.dtype.max > dtype.max:
    clamped = clip_ops.clip_by_value(clamped,
                                     math_ops.cast(dtype.min, image.dtype),
                                     math_ops.cast(dtype.max, image.dtype))
  elif image.dtype.min < dtype.min:
    clamped = math_ops.maximum(clamped, math_ops.cast(dtype.min, image.dtype))
  elif image.dtype.max > dtype.max:
    clamped = math_ops.minimum(clamped, math_ops.cast(dtype.max, image.dtype))

  return math_ops.cast(clamped, dtype)
Beispiel #9
0
  def get_gradients(self, loss, params):
    """Returns gradients of `loss` with respect to `params`.

    Arguments:
      loss: Loss tensor.
      params: List of variables.

    Returns:
      List of gradient tensors.

    Raises:
      ValueError: In case any gradient cannot be computed (e.g. if gradient
        function not implemented).
    """
    params = nest.flatten(params)
    with backend.get_graph().as_default():
      grads = gradients.gradients(loss, params)
    for grad, param in zip(grads, params):
      if grad is None:
        raise ValueError("Variable {} has `None` for gradient. "
                         "Please make sure that all of your ops have a "
                         "gradient defined (i.e. are differentiable). "
                         "Common ops without gradient: "
                         "K.argmax, K.round, K.eval.".format(param))
    if hasattr(self, "clipnorm"):
      grads = [clip_ops.clip_by_norm(g, self.clipnorm) for g in grads]
    if hasattr(self, "clipvalue"):
      grads = [
          clip_ops.clip_by_value(g, -self.clipvalue, self.clipvalue)
          for g in grads
      ]
    return grads
  def get_gradients(self, loss, params):
    """Returns gradients of `loss` with respect to `params`.

    Arguments:
      loss: Loss tensor.
      params: List of variables.

    Returns:
      List of gradient tensors.

    Raises:
      ValueError: In case any gradient cannot be computed (e.g. if gradient
        function not implemented).
    """
    loss = self._scale_loss(loss)
    grads = gradients.gradients(loss, params)
    if None in grads:
      raise ValueError("An operation has `None` for gradient. "
                       "Please make sure that all of your ops have a "
                       "gradient defined (i.e. are differentiable). "
                       "Common ops without gradient: "
                       "K.argmax, K.round, K.eval.")
    if hasattr(self, "clipnorm"):
      grads = [clip_ops.clip_by_norm(g, self.clipnorm) for g in grads]
    if hasattr(self, "clipvalue"):
      grads = [
          clip_ops.clip_by_value(g, -self.clipvalue, self.clipvalue)
          for g in grads
      ]
    return grads
Beispiel #11
0
  def testClipByValueNonFinite(self):
    with self.test_session(use_gpu=True):
      x = constant_op.constant([float('NaN'), float('Inf'), -float('Inf')])
      np_ans = [float('NaN'), 4.0, -4.0]
      clip_value = 4.0
      ans = clip_ops.clip_by_value(x, -clip_value, clip_value)
      tf_ans = ans.eval()

    self.assertAllClose(np_ans, tf_ans)
Beispiel #12
0
  def testClipByValue(self):
    with self.test_session(use_gpu=True):
      x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
      np_ans = [[-4.4, 2.0, 3.0], [4.0, 4.4, 4.4]]
      clip_value = 4.4
      ans = clip_ops.clip_by_value(x, -clip_value, clip_value)
      tf_ans = ans.eval()

    self.assertAllClose(np_ans, tf_ans)
 def LSTMCell(cls, x, mprev, cprev, weights):
   xm = array_ops.concat([x, mprev], 1)
   i_i, i_g, f_g, o_g = array_ops.split(
       value=math_ops.matmul(xm, weights), num_or_size_splits=4, axis=1)
   new_c = math_ops.sigmoid(f_g) * cprev + math_ops.sigmoid(
       i_g) * math_ops.tanh(i_i)
   new_c = clip_ops.clip_by_value(new_c, -50.0, 50.0)
   new_m = math_ops.sigmoid(o_g) * math_ops.tanh(new_c)
   return new_m, new_c
Beispiel #14
0
def compute_cdf(values, value_range, **kwargs):
  """Returns the normalized cumulative distribution of the given values tensor.

  Uses tf.while_loop to directly compute the cdf of the values. Number of bins
  for histogram is fixed at _NBINS=255

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`
    **kwargs: keyword arguments: name

  Returns:
    A 1-D `Tensor` holding normalized cdf of values.

  """
  nbins = _NBINS
  name = kwargs.get('name', None)
  with ops.name_scope(name, 'cdf', [values, value_range, nbins]):
    values = ops.convert_to_tensor(values, name='values')
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    nbins_float = np.float32(nbins)

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(
        values - value_range[0],
        value_range[1] - value_range[0],
        name='scaled_values')

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)

    cdf = array_ops.zeros(nbins)
    i = constant_op.constant(0)

    def loop_cond(loop_count, _):
      return math_ops.less(loop_count, nbins)

    def loop_body(loop_count, cdf):
      temp = math_ops.reduce_sum(
          math_ops.cast(
              math_ops.less_equal(indices, loop_count), dtypes.float32))
      cdf = math_ops.add(
          cdf,
          array_ops.one_hot(
              loop_count, depth=_NBINS, on_value=temp, off_value=0.0))
      return [loop_count + 1, cdf]

    _, cdf = control_flow_ops.while_loop(
        loop_cond, loop_body, [i, cdf], maximum_iterations=nbins)

    return math_ops.div(cdf, math_ops.reduce_max(cdf))
  def testClipByValueNonFinite(self):
    # TODO(b/78016351): Enable test on GPU once the bug is fixed.
    with self.cached_session():
      x = constant_op.constant([float('NaN'), float('Inf'), -float('Inf')])
      np_ans = [float('NaN'), 4.0, -4.0]
      clip_value = 4.0
      ans = clip_ops.clip_by_value(x, -clip_value, clip_value)
      tf_ans = self.evaluate(ans)

    self.assertAllClose(np_ans, tf_ans)
Beispiel #16
0
    def __call__(self, inputs, state, scope=None):
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        if self._state_is_tuple:
            (c_prev,m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
        with vs.variable_scope(scope or type(self).__name__,
                               initializer=self._initializer):
            concat_w = tf.nn.rnn_cell._get_concat_variable(
                "W", [input_size.value + num_proj, 3 * self._num_units],
                dtype, self._num_unit_shards)

            b = vs.get_variable(
                "B", shape=[3 * self._num_units],
                initializer=init_ops.zeros_initializer, dtype=dtype)

            cell_inputs = array_ops.concat(1,[inputs, m_prev])
            ltm_matrix = nn_ops.bias_add(math_ops.matmul(cell_inputs, concat_w), b)
            i,j,o = array_ops.split(1,3,ltm_matrix) # i,j,o: [1,num_units]
            c = c_prev + sigmoid(i)*self._activation(j)
            if self._cell_clip is not None:
                c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            m = sigmoid(o) * self._activation(c)
            if self._num_proj is not None:
                concat_w_proj = tf.nn.rnn_cell._get_concat_variable(
                                "W_P", [self._num_units, self._num_proj],
                                dtype, self._num_proj_shards)
                m = math_ops.matmul(m, concat_w_proj)
                if self._proj_clip is not None:
                    m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
        new_state = (tf.nn.rnn_cell.LSTMStateTuple(c,m) if self._state_is_tuple
                     else array_ops.concat(1,[c,m]))
        return m, new_state
  def testConstraint(self):
    constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
    constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
    with self.cached_session():
      var0 = variables.Variable([1.0, 2.0],
                                constraint=constraint_01)
      var1 = variables.Variable([3.0, 4.0],
                                constraint=constraint_0)
      loss = lambda: 5 * var0 + 3 * var1
      sgd = gradient_descent.SGD(3.0)

      self.evaluate(variables.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
      # Run 1 step of sgd through optimizer
      opt_op = sgd.minimize(loss, var_list=[var0, var1])
      self.evaluate(variables.global_variables_initializer())
      self.evaluate(opt_op)
      # Validate updated params
      self.assertAllClose([-0.1, -0.1], self.evaluate(var0))
      self.assertAllClose([0., 0.], self.evaluate(var1))
Beispiel #18
0
  def testClipByValue0Type(self):
    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64,
                  dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
                  dtypes.uint8, dtypes.uint16]:
      with self.test_session(use_gpu=True):
        x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
        np_ans = [[2, 2, 3], [4, 4, 4]]
        clip_value_min = 2
        clip_value_max = 4
        ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
        tf_ans = ans.eval()

      self.assertAllClose(np_ans, tf_ans)
  def _apply_sparse(self, grad, var):

    max_learning_rate = array_ops.where(self._counter < self._burnin,
                                        self._burnin_max_learning_rate,
                                        self._max_learning_rate)

    learn_rate = clip_ops.clip_by_value(
        self._get_coordinatewise_learning_rate(grad, var), 0.0,
        math_ops.cast(max_learning_rate, var.dtype))
    delta = grad.values * learn_rate

    return state_ops.scatter_sub(var, grad.indices, delta,
                                 use_locking=self._use_locking)
Beispiel #20
0
  def _compute_gradients(self, loss, var_list, grad_loss=None):
    """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A callable taking no arguments which returns the value to minimize.
      var_list: list or tuple of `Variable` objects to update to minimize
        `loss`, or a callable returning the list or tuple of `Variable` objects.
        Use callable when the variable list would otherwise be incomplete before
        `minimize` and the variables are created at the first time when `loss`
        is called.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid, or var_list is None.
    """
    # TODO(josh11b): Test that we handle weight decay in a reasonable way.
    with backprop.GradientTape() as tape:
      if not callable(var_list):
        tape.watch(var_list)
      loss_value = loss()
    if callable(var_list):
      var_list = var_list()
    var_list = nest.flatten(var_list)
    grads = tape.gradient(loss_value, var_list, grad_loss)

    if hasattr(self, "clipnorm"):
      grads = [clip_ops.clip_by_norm(g, self.clipnorm) for g in grads]
    if hasattr(self, "clipvalue"):
      grads = [
          clip_ops.clip_by_value(g, -self.clipvalue, self.clipvalue)
          for g in grads
      ]

    grads_and_vars = list(zip(grads, var_list))
    self._assert_valid_dtypes([
        v for g, v in grads_and_vars
        if g is not None and v.dtype != dtypes.resource
    ])

    return grads_and_vars
Beispiel #21
0
def adjust_saturation(image, saturation_factor, name=None):
  """Adjust saturation of an RGB image.

  This is a convenience method that converts an RGB image to float
  representation, converts it to HSV, add an offset to the saturation channel,
  converts back to RGB and then back to the original data type. If several
  adjustments are chained it is advisable to minimize the number of redundant
  conversions.

  `image` is an RGB image.  The image saturation is adjusted by converting the
  image to HSV and multiplying the saturation (S) channel by
  `saturation_factor` and clipping. The image is then converted back to RGB.

  Args:
    image: RGB image or images. Size of the last dimension must be 3.
    saturation_factor: float. Factor to multiply the saturation by.
    name: A name for this operation (optional).

  Returns:
    Adjusted image(s), same shape and DType as `image`.
  """
  with ops.name_scope(name, 'adjust_saturation', [image]) as name:
    image = ops.convert_to_tensor(image, name='image')
    # Remember original dtype to so we can convert back if needed
    orig_dtype = image.dtype
    flt_image = convert_image_dtype(image, dtypes.float32)

    # TODO(zhengxq): we will switch to the fused version after we add a GPU
    # kernel for that.
    fused = os.environ.get('TF_ADJUST_SATURATION_FUSED', '')
    fused = fused.lower() in ('true', 't', '1')

    if fused:
      return convert_image_dtype(
          gen_image_ops.adjust_saturation(flt_image, saturation_factor),
          orig_dtype)

    hsv = gen_image_ops.rgb_to_hsv(flt_image)

    hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
    saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
    value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])

    saturation *= saturation_factor
    saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0)

    hsv_altered = array_ops.concat([hue, saturation, value], 2)
    rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)

    return convert_image_dtype(rgb_altered, orig_dtype)
  def testConstraint(self):
    constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
    constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
    with self.cached_session():
      var0 = variables.Variable([1.0, 2.0],
                                constraint=constraint_01)
      var1 = variables.Variable([3.0, 4.0],
                                constraint=constraint_0)
      cost = 5 * var0 + 3 * var1
      global_step = variables.Variable(
          array_ops.zeros([], dtypes.int64), name='global_step')
      sgd_op = gradient_descent.GradientDescentOptimizer(3.0)
      opt_op = sgd_op.minimize(cost, global_step, [var0, var1])

      variables.global_variables_initializer().run()
      # Fetch params to validate initial values
      self.assertAllClose([1.0, 2.0], var0.eval())
      self.assertAllClose([3.0, 4.0], var1.eval())
      # Run 1 step of sgd through optimizer
      opt_op.run()
      # Validate updated params
      self.assertAllClose([-0.1, -0.1], var0.eval())
      self.assertAllClose([0., 0.], var1.eval())
  def _testClipIndexedSlicesByValue(self, values, indices, shape,
                                    clip_value_min, clip_value_max, expected):
    with self.session(use_gpu=True) as sess:
      values = constant_op.constant(values)
      indices = constant_op.constant(indices)
      shape = constant_op.constant(shape)
      # IndexedSlices mode
      indixed_slices = ops.IndexedSlices(values, indices, shape)
      clipped = clip_ops.clip_by_value(indixed_slices, clip_value_min,
                                       clip_value_max)
      # clipped should be IndexedSlices
      self.assertIsInstance(clipped, ops.IndexedSlices)

    self.assertAllClose(clipped.values, expected)
  def DISABLED_testClipByValue2Type(self):
    for dtype in [
        dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int8,
        dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16
    ]:
      with self.cached_session(use_gpu=True):
        x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
        np_ans = [[4, 4, 4], [4, 5, 6]]
        clip_value_min = 4
        clip_value_max = constant_op.constant(
            [6, 6, 6, 6, 6, 6], shape=[2, 3], dtype=dtype)
        ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
        tf_ans = self.evaluate(ans)

      self.assertAllClose(np_ans, tf_ans)
  def _apply_dense(self, grad, var):

    max_learning_rate = array_ops.where(self._counter < self._burnin,
                                        self._burnin_max_learning_rate,
                                        self._max_learning_rate)

    learn_rates = clip_ops.clip_by_value(
        self._get_coordinatewise_learning_rate(grad, var), 0.0,
        math_ops.cast(max_learning_rate, var.dtype.base_dtype))

    newgrad = grad * learn_rates
    return training_ops.apply_gradient_descent(
        var,
        math_ops.cast(1.0, var.dtype),
        newgrad,
        use_locking=self._use_locking).op
Beispiel #26
0
def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None):
  """Return histogram of values.

  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram').

  Returns:
    A 1-D `Tensor` holding histogram of values.

  """
  with ops.name_scope(name, 'histogram', [values, value_range, nbins]) as scope:
    values = ops.convert_to_tensor(values, name='values')
    values = gen_array_ops.reshape(values, [-1])
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    nbins = ops.convert_to_tensor(nbins, dtype=np.int32, name='nbins')
    nbins_float = math_ops.cast(nbins, values.dtype)

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(
        values - value_range[0],
        value_range[1] - value_range[0],
        name='scaled_values')

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), np.int32)

    return math_ops.unsorted_segment_sum(
        array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope)
Beispiel #27
0
def adjust_saturation(image, saturation_factor, name=None):
    with ops.op_scope([image], name, 'adjust_saturation') as name:
        # Remember original dtype to so we can convert back if needed
        orig_dtype = image.dtype
        flt_image = tf.image.convert_image_dtype(image, tf.float32)

        hsv = gen_image_ops.rgb_to_hsv(flt_image)

        hue = tf.slice(hsv, [0, 0, 0, 0], [-1, -1, -1, 1])
        saturation = tf.slice(hsv, [0, 0, 0, 1], [-1, -1, -1, 1])
        value = tf.slice(hsv, [0, 0, 0, 2], [-1, -1, -1, 1])

        saturation *= saturation_factor
        saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0)

        hsv_altered = tf.concat(3, [hue, saturation, value])
        rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)

        return tf.image.convert_image_dtype(rgb_altered, orig_dtype)
Beispiel #28
0
  def build(self, inputs_shape):
    if inputs_shape[1].value is None:
      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                       % inputs_shape)

    input_depth = inputs_shape[1].value
    if self._input_initializer is None:
      self._input_initializer = init_ops.random_normal_initializer(mean=0.0,
                                                                   stddev=0.001)
    self._input_kernel = self.add_variable(
        "input_kernel",
        shape=[input_depth, self._num_units],
        initializer=self._input_initializer)

    if self._recurrent_initializer is None:
      self._recurrent_initializer = init_ops.constant_initializer(1.)
    self._recurrent_kernel = self.add_variable(
        "recurrent_kernel",
        shape=[self._num_units],
        initializer=self._recurrent_initializer)

    # Clip the absolute values of the recurrent weights to the specified minimum
    if self._recurrent_min_abs:
      abs_kernel = math_ops.abs(self._recurrent_kernel)
      min_abs_kernel = math_ops.maximum(abs_kernel, self._recurrent_min_abs)
      self._recurrent_kernel = math_ops.multiply(
          math_ops.sign(self._recurrent_kernel),
          min_abs_kernel
      )

    # Clip the absolute values of the recurrent weights to the specified maximum
    if self._recurrent_max_abs:
      self._recurrent_kernel = clip_ops.clip_by_value(self._recurrent_kernel,
                                                      -self._recurrent_max_abs,
                                                      self._recurrent_max_abs)

    self._bias = self.add_variable(
        "bias",
        shape=[self._num_units],
        initializer=init_ops.zeros_initializer(dtype=self.dtype))

    self.built = True
def safe_cumprod(x, *args, **kwargs):
  """Computes cumprod of x in logspace using cumsum to avoid underflow.

  The cumprod function and its gradient can result in numerical instabilities
  when its argument has very small and/or zero values.  As long as the argument
  is all positive, we can instead compute the cumulative product as
  exp(cumsum(log(x))).  This function can be called identically to tf.cumprod.

  Args:
    x: Tensor to take the cumulative product of.
    *args: Passed on to cumsum; these are identical to those in cumprod.
    **kwargs: Passed on to cumsum; these are identical to those in cumprod.
  Returns:
    Cumulative product of x.
  """
  with ops.name_scope(None, "SafeCumprod", [x]):
    x = ops.convert_to_tensor(x, name="x")
    tiny = np.finfo(x.dtype.as_numpy_dtype).tiny
    return math_ops.exp(math_ops.cumsum(
        math_ops.log(clip_ops.clip_by_value(x, tiny, 1)), *args, **kwargs))
Beispiel #30
0
def log_cosh(x, name="log_cosh"):
  """Logarithm of hyperbolic cosine:  `log_cosh(x) = Log[(e**x + e**-x) / 2]`.

  Args:
    x:  Numeric `Tensor`.
    name:  A string name to prepend to created Ops.

  Returns:
    Numeric `Tensor` of same `shape` and `dtype` as `x`.
  """
  # For large |x| >> 1, e**x will become Inf.  So we need to approximate
  # Log[e**x + e**-x] approx |x|.
  # We also need to ensure that large |x| is never fed to the exponential func.
  with ops.name_scope(name):
    x = ops.convert_to_tensor(x, name="x")
    large_x_value = 0.9 * np.log(np.finfo(x.dtype.as_numpy_dtype).max)
    x_capped = clip_ops.clip_by_value(x, -large_x_value, large_x_value)
    return array_ops.where(
        math_ops.abs(x) > large_x_value,
        math_ops.abs(x) - np.log(2).astype(x.dtype.as_numpy_dtype),
        math_ops.log(cosh(x_capped)))
Beispiel #31
0
    def call(self, inputs, state):
        """
      run one step of cell
      Args:
        inputs: input tensor, 2D, batch X num_units
        state: if 'state_is_tuple' is False, this must be a state Tensor, '2D', batch x state_size. if 'state_is_tuple' is True, this must be a tuple of state Tensors, both '2D' with column size 'c_state' and 'm_state'
    """
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        c_t, m_t = state
        c_prev, m_prev = c_t[:, 0:self._num_units], m_t[:, 0:self._num_units]
        hyper_state = LSTMStateTuple(c_t[:, self._num_units:],
                                     m_t[:, self._num_units:])

        w_init = None
        h_init = lstm_ortho_initializer(1.0)

        #if True: # self._state_is_tuple:
        #(c_prev, m_prev) = state
        #else:
        #  c_prev = array_ops.slice(state, [0,0], [-1,self._num_units])
        #  m_prev = array_ops.slice(state, [0,self._num_units], [-1,num_proj])
        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]

        if input_size.value is None:
            raise ValueError(
                'Could not infer input size from inputs.get_shape()[-1]')

        batch_size = inputs.get_shape().with_rank(2)[0]
        #print(inputs)
        x_hat = tf.concat([inputs, m_prev], 1)
        #print(x_hat)

        if self._hyper_cell is None:
            with vs.variable_scope('hyper_lstm') as scope:
                self._hyper_cell = LSTMCell(self._hyper_num_units)
        h_out, new_hyper_state = self._hyper_cell(x_hat, hyper_state)

        W_xh = tf.get_variable('W_xh', [input_size, self._num_units * 4],
                               initializer=w_init)
        W_hh = tf.get_variable('W_hh', [input_size, self._num_units * 4],
                               initializer=w_init)
        bias = tf.get_variable('W_bias', [self._num_units * 4],
                               initializer=tf.constant_initializer(0.0))

        xh = tf.matmul(inputs, W_xh)
        hh = tf.matmul(inputs, W_hh)

        ix, jx, fx, ox = tf.split(xh, 4, 1)
        ix = _hyper_norm(ix, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_ix')
        jx = _hyper_norm(jx, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_jx')
        fx = _hyper_norm(fx, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_fx')
        ox = _hyper_norm(ox, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_ox')

        ih, jh, fh, oh = tf.split(hh, 4, 1)
        ih = _hyper_norm(ih, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_ih')
        jh = _hyper_norm(jh, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_jh')
        fh = _hyper_norm(fh, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_fh')
        oh = _hyper_norm(oh, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_oh')

        ib, jb, fb, ob = tf.split(bias, 4, 0)
        ib = _hyper_bias(ib, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_ib')
        jb = _hyper_bias(jb, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_jb')
        fb = _hyper_bias(fb, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_fb')
        ob = _hyper_bias(ob, h_out, self._hyper_embed_size, self._num_units,
                         'hyper_ob')

        i = ix + ih + ib
        j = jx + jh + jb
        f = fx + fh + fb
        o = ox + oh + ob
        print(i)

        #if self._w_h_linear is None:
        #  with vs.variable_scope('w_h_linear') as scope:
        #    self._w_h_linear = _Linear([])

        #if self._linear1 is None:
        #  scope = vs.get_variable_scope()
        #  with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
        #    self._linear1 = _Linear([inputs, m_prev], 4*self._num_units, True)
        #lstm_matrix = self._linear1([inputs, m_prev])
        #i,j,f,o = array_ops.split(value=lstm_matrix, num_or_size_splits=4, axis=1)
        if self._ln_i is None:
            self._ln_i = Layer_Normalization([self._num_units], scope='i_norm')
        if self._ln_j is None:
            self._ln_j = Layer_Normalization([self._num_units], scope='j_norm')
        if self._ln_f is None:
            self._ln_f = Layer_Normalization([self._num_units], scope='f_norm')
        if self._ln_o is None:
            self._ln_o = Layer_Normalization([self._num_units], scope='o_norm')
        i, j, f, o = self._ln_i(i), self._ln_j(j), self._ln_f(f), self._ln_o(o)

        if self._use_peepholes and not self._w_f_diag:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                with vs.variable_scope(unit_scope):
                    self._w_f_diag = vs.get_variable("w_f_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_i_diag = vs.get_variable("w_i_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_o_diag = vs.get_variable("w_o_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    if self._ln_p1 is None:
                        self._ln_p1 = Layer_Normalization([self._num_units],
                                                          scope='p1_norm')
                    if self._ln_p2 is None:
                        self._ln_p2 = Layer_Normalization([self._num_units],
                                                          scope='p2_norm')
        if self._use_peepholes:
            peep1 = self._w_f_diag * c_prev
            peep2 = self._w_i_diag * c_prev
            c = (sigmoid(f + self._forget_bias + self._ln_p1(peep1)) +
                 sigmoid(i + self._ln_p2(peep2)) * self._activation(j))
        else:
            c = sigmoid(f + self._forget_bias) * c_prev + sigmoid(
                i) * self._activation(j)
        if self._ln_c is None:
            self._ln_c = Layer_Normalization([self._num_units], scope='c_norm')
        c = self._ln_c(c)
        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            if self._linear is None:
                scope = vs.get_variable_scope(scope)
                with vs.variable_scope(scope, initializer=self._initializer):
                    with vs.variable_scope("projection") as proj_scope:
                        self._linear2 = _Linear(m, self._num_proj, False)
            m = self._linear2(m)
            if self._proj_clip is not None:
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
        hyper_c, hyper_h = new_hyper_state
        new_state = LSTMStateTuple(tf.concat([c, hyper_c], 1),
                                   tf.concat([m, hyper_h], 1))
        return m, new_state
def _update_confusion_matrix_variables_optimized(
        variables_to_update,
        y_true,
        y_pred,
        thresholds,
        multi_label=False,
        sample_weights=None,
        label_weights=None,
        thresholds_with_epsilon=False):
    """Update confusion matrix variables with memory efficient alternative.

  Note that the thresholds need to be evenly distributed within the list, eg,
  the diff between consecutive elements are the same.

  To compute TP/FP/TN/FN, we are measuring a binary classifier
    C(t) = (predictions >= t)
  at each threshold 't'. So we have
    TP(t) = sum( C(t) * true_labels )
    FP(t) = sum( C(t) * false_labels )

  But, computing C(t) requires computation for each t. To make it fast,
  observe that C(t) is a cumulative integral, and so if we have
    thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
  where n = num_thresholds, and if we can compute the bucket function
    B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
  then we get
    C(t_i) = sum( B(j), j >= i )
  which is the reversed cumulative sum in tf.cumsum().

  We can compute B(i) efficiently by taking advantage of the fact that
  our thresholds are evenly distributed, in that
    width = 1.0 / (num_thresholds - 1)
    thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
  Given a prediction value p, we can map it to its bucket by
    bucket_index(p) = floor( p * (num_thresholds - 1) )
  so we can use tf.math.unsorted_segment_sum() to update the buckets in one
  pass.

  Consider following example:
  y_true = [0, 0, 1, 1]
  y_pred = [0.1, 0.5, 0.3, 0.9]
  thresholds = [0.0, 0.5, 1.0]
  num_buckets = 2   # [0.0, 1.0], (1.0, 2.0]
  bucket_index(y_pred) = tf.math.floor(y_pred * num_buckets)
                       = tf.math.floor([0.2, 1.0, 0.6, 1.8])
                       = [0, 0, 0, 1]
  # The meaning of this bucket is that if any of the label is true,
  # then 1 will be added to the corresponding bucket with the index.
  # Eg, if the label for 0.2 is true, then 1 will be added to bucket 0. If the
  # label for 1.8 is true, then 1 will be added to bucket 1.
  #
  # Note the second item "1.0" is floored to 0, since the value need to be
  # strictly larger than the bucket lower bound.
  # In the implementation, we use tf.math.ceil() - 1 to achieve this.
  tp_bucket_value = tf.math.unsorted_segment_sum(true_labels, bucket_indices,
                                                 num_segments=num_thresholds)
                  = [1, 1, 0]
  # For [1, 1, 0] here, it means there is 1 true value contributed by bucket 0,
  # and 1 value contributed by bucket 1. When we aggregate them to together,
  # the result become [a + b + c, b + c, c], since large thresholds will always
  # contribute to the value for smaller thresholds.
  true_positive = tf.math.cumsum(tp_bucket_value, reverse=True)
                = [2, 1, 0]

  This implementation exhibits a run time and space complexity of O(T + N),
  where T is the number of thresholds and N is the size of predictions.
  Metrics that rely on standard implementation instead exhibit a complexity of
  O(T * N).

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A floating point `Tensor` whose shape matches `y_pred`. Will be cast
      to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A sorted floating point `Tensor` with value in `[0, 1]`.
      It need to be evenly distributed (the diff between each element need to be
      the same).
    multi_label: Optional boolean indicating whether multidimensional
      prediction/labels should be treated as multilabel responses, or flattened
      into a single label. When True, the valus of `variables_to_update` must
      have a second dimension equal to the number of labels in y_true and
      y_pred, and those tensors must not be RaggedTensors.
    sample_weights: Optional `Tensor` whose rank is either 0, or the same rank
      as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions
      must be either `1`, or the same as the corresponding `y_true` dimension).
    label_weights: Optional tensor of non-negative weights for multilabel
      data. The weights are applied when calculating TP, FP, FN, and TN without
      explicit multilabel handling (i.e. when the data is to be flattened).
    thresholds_with_epsilon: Optional boolean indicating whether the leading and
      tailing thresholds has any epsilon added for floating point imprecisions.
      It will change how we handle the leading and tailing bucket.

  Returns:
    Update op.
  """
    num_thresholds = thresholds.shape.as_list()[0]

    if sample_weights is None:
        sample_weights = 1.0
    else:
        sample_weights = weights_broadcast_ops.broadcast_weights(
            math_ops.cast(sample_weights, dtype=y_pred.dtype), y_pred)
        if not multi_label:
            sample_weights = array_ops.reshape(sample_weights, [-1])
    if label_weights is None:
        label_weights = 1.0
    else:
        label_weights = array_ops.expand_dims(label_weights, 0)
        label_weights = weights_broadcast_ops.broadcast_weights(
            label_weights, y_pred)
        if not multi_label:
            label_weights = array_ops.reshape(label_weights, [-1])
    weights = math_ops.multiply(sample_weights, label_weights)

    # We shouldn't need this, but in case there are predict value that is out of
    # the range of [0.0, 1.0]
    y_pred = clip_ops.clip_by_value(y_pred,
                                    clip_value_min=0.0,
                                    clip_value_max=1.0)

    y_true = math_ops.cast(math_ops.cast(y_true, dtypes.bool), y_true.dtype)
    if not multi_label:
        y_true = array_ops.reshape(y_true, [-1])
        y_pred = array_ops.reshape(y_pred, [-1])

    true_labels = math_ops.multiply(y_true, weights)
    false_labels = math_ops.multiply((1.0 - y_true), weights)

    # Compute the bucket indices for each prediction value.
    # Since the predict value has to be strictly greater than the thresholds,
    # eg, buckets like [0, 0.5], (0.5, 1], and 0.5 belongs to first bucket.
    # We have to use math.ceil(val) - 1 for the bucket.
    bucket_indices = math_ops.ceil(y_pred * (num_thresholds - 1)) - 1

    if thresholds_with_epsilon:
        # In this case, the first bucket should actually take into account since
        # the any prediction between [0.0, 1.0] should be larger than the first
        # threshold. We change the bucket value from -1 to 0.
        bucket_indices = nn_ops.relu(bucket_indices)

    bucket_indices = math_ops.cast(bucket_indices, dtypes.int32)

    if multi_label:
        # We need to run bucket segment sum for each of the label class. In the
        # multi_label case, the rank of the label is 2. We first transpose it so
        # that the label dim becomes the first and we can parallel run though them.
        true_labels = array_ops.transpose_v2(true_labels)
        false_labels = array_ops.transpose_v2(false_labels)
        bucket_indices = array_ops.transpose_v2(bucket_indices)

        def gather_bucket(label_and_bucket_index):
            label, bucket_index = label_and_bucket_index[
                0], label_and_bucket_index[1]
            return math_ops.unsorted_segment_sum(data=label,
                                                 segment_ids=bucket_index,
                                                 num_segments=num_thresholds)

        tp_bucket_v = vectorized_map(gather_bucket,
                                     (true_labels, bucket_indices))
        fp_bucket_v = vectorized_map(gather_bucket,
                                     (false_labels, bucket_indices))
        tp = array_ops.transpose_v2(
            math_ops.cumsum(tp_bucket_v, reverse=True, axis=1))
        fp = array_ops.transpose_v2(
            math_ops.cumsum(fp_bucket_v, reverse=True, axis=1))
    else:
        tp_bucket_v = math_ops.unsorted_segment_sum(
            data=true_labels,
            segment_ids=bucket_indices,
            num_segments=num_thresholds)
        fp_bucket_v = math_ops.unsorted_segment_sum(
            data=false_labels,
            segment_ids=bucket_indices,
            num_segments=num_thresholds)
        tp = math_ops.cumsum(tp_bucket_v, reverse=True)
        fp = math_ops.cumsum(fp_bucket_v, reverse=True)

    # fn = sum(true_labels) - tp
    # tn = sum(false_labels) - fp
    if (ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
            or ConfusionMatrix.FALSE_NEGATIVES in variables_to_update):
        if multi_label:
            total_true_labels = math_ops.reduce_sum(true_labels, axis=1)
            total_false_labels = math_ops.reduce_sum(false_labels, axis=1)
        else:
            total_true_labels = math_ops.reduce_sum(true_labels)
            total_false_labels = math_ops.reduce_sum(false_labels)

    update_ops = []
    if ConfusionMatrix.TRUE_POSITIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.TRUE_POSITIVES]
        update_ops.append(variable.assign_add(tp))
    if ConfusionMatrix.FALSE_POSITIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.FALSE_POSITIVES]
        update_ops.append(variable.assign_add(fp))
    if ConfusionMatrix.TRUE_NEGATIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.TRUE_NEGATIVES]
        tn = total_false_labels - fp
        update_ops.append(variable.assign_add(tn))
    if ConfusionMatrix.FALSE_NEGATIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.FALSE_NEGATIVES]
        fn = total_true_labels - tp
        update_ops.append(variable.assign_add(fn))
    return control_flow_ops.group(update_ops)
Beispiel #33
0
    def call(self, inputs, state):
        dtype = inputs.dtype
        num_units = self._num_units
        sigmoid = math_ops.sigmoid
        c, h = state

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")

        with vs.variable_scope(self._scope, initializer=self._initializer):

            concat = self._linear([inputs, h],
                                  4 * num_units,
                                  norm=self._norm,
                                  bias=True)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = array_ops.split(value=concat,
                                         num_or_size_splits=4,
                                         axis=1)

            if self._use_peepholes:
                w_f_diag = vs.get_variable("w_f_diag",
                                           shape=[num_units],
                                           dtype=dtype)
                w_i_diag = vs.get_variable("w_i_diag",
                                           shape=[num_units],
                                           dtype=dtype)
                w_o_diag = vs.get_variable("w_o_diag",
                                           shape=[num_units],
                                           dtype=dtype)

                new_c = (c * sigmoid(f + self._forget_bias + w_f_diag * c) +
                         sigmoid(i + w_i_diag * c) * self._activation(j))
            else:
                new_c = (c * sigmoid(f + self._forget_bias) +
                         sigmoid(i) * self._activation(j))

            if self._cell_clip is not None:
                new_c = clip_ops.clip_by_value(new_c, -self._cell_clip,
                                               self._cell_clip)
            if self._use_peepholes:
                new_h = sigmoid(o + w_o_diag * new_c) * self._activation(new_c)
            else:
                new_h = sigmoid(o) * self._activation(new_c)

            if self._num_proj is not None:
                with vs.variable_scope("projection"):
                    new_h = self._linear(new_h,
                                         self._num_proj,
                                         norm=self._norm,
                                         bias=False)

                if self._proj_clip is not None:
                    new_h = clip_ops.clip_by_value(new_h, -self._proj_clip,
                                                   self._proj_clip)

            new_state = LSTMStateTuple(new_c, new_h)
            return new_h, new_state
Beispiel #34
0
 def safe_polygamma(x, y):
     return math_ops.polygamma(
         math_ops.round(clip_ops.clip_by_value(y, 1, 10)), x * x + 1)
Beispiel #35
0
    def call(self, inputs, state):
        """Run one step of LSTM.
        Args:
            inputs: input Tensor, 2D, batch x num_units.
            state: A tuple of state Tensors, both `2-D`, with column sizes
             `c_state` and `m_state`.
        Returns:
            A tuple containing:
            - A `2-D, [batch x output_dim]`, Tensor representing the output of the
                LSTM after reading `inputs` when previous state was `state`.
                Here output_dim is:
                     num_proj if num_proj was set,
                     num_units otherwise.
            - Tensor(s) representing the new state of LSTM after reading `inputs` when
                the previous state was `state`.  Same type and shape(s) as `state`.
        Raises:
            ValueError: If input size cannot be inferred from inputs via
                static shape inference.
        """
        dtype = inputs.dtype
        num_units = self._num_units
        sigmoid = math_ops.sigmoid
        c, h = state

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")

        with vs.variable_scope(self._scope, initializer=self._initializer):

            concat = self._linear([inputs, h],
                                  4 * num_units,
                                  norm=self._norm,
                                  bias=True)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = array_ops.split(value=concat,
                                         num_or_size_splits=4,
                                         axis=1)

            if self._use_peepholes:
                w_f_diag = vs.get_variable("w_f_diag",
                                           shape=[num_units],
                                           dtype=dtype)
                w_i_diag = vs.get_variable("w_i_diag",
                                           shape=[num_units],
                                           dtype=dtype)
                w_o_diag = vs.get_variable("w_o_diag",
                                           shape=[num_units],
                                           dtype=dtype)

                new_c = (c * sigmoid(f + self._forget_bias + w_f_diag * c) +
                         sigmoid(i + w_i_diag * c) * self._activation(j))
            else:
                new_c = (c * sigmoid(f + self._forget_bias) +
                         sigmoid(i) * self._activation(j))

            if self._cell_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                new_c = clip_ops.clip_by_value(new_c, -self._cell_clip,
                                               self._cell_clip)
                # pylint: enable=invalid-unary-operand-type
            if self._use_peepholes:
                new_h = sigmoid(o + w_o_diag * new_c) * self._activation(new_c)
            else:
                new_h = sigmoid(o) * self._activation(new_c)

            if self._num_proj is not None:
                with vs.variable_scope("projection"):
                    new_h = self._linear(new_h,
                                         self._num_proj,
                                         norm=self._norm,
                                         bias=False)

                if self._proj_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    new_h = clip_ops.clip_by_value(new_h, -self._proj_clip,
                                                   self._proj_clip)
                    # pylint: enable=invalid-unary-operand-type

            new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h)
            return new_h, new_state
Beispiel #36
0
    def __call__(self, inputs, state, scope=None, ab=None):
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        with vs.variable_scope(scope or type(self).__name__,
                               initializer=self._initializer):  # "LSTMCell"

            gnum = 4
            if not self._no_previous:
                lstm_w_len = input_size.value + num_proj
                # cell inputs is Xt(inputs) and Yt-1(m_prev)
                cell_inputs = array_ops.concat([inputs, m_prev], 1)
            else:
                lstm_w_len = input_size.value
                cell_inputs = array_ops.concat([inputs], 1)

            concat_w = _get_concat_variable(
                "W", [lstm_w_len, gnum * self._num_units], dtype,
                self._num_unit_shards)

            b = vs.get_variable("B",
                                shape=[gnum * self._num_units],
                                initializer=init_ops.zeros_initializer,
                                dtype=dtype)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            lstm_matrix = nn_ops.bias_add(
                math_ops.matmul(cell_inputs, concat_w), b)
            i, j, f, o = array_ops.split(lstm_matrix, gnum, axis=1)
            c = (
                sigmoid(f + self._forget_bias) * c_prev +
                ab[0] *  # scaling factor,
                sigmoid(i) * self._activation(j))

            if self._cell_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)

            m = sigmoid(o) * self._activation(c)
            # end of ELSTM major operation

            if self._num_proj is not None:
                concat_w_proj = _get_concat_variable(
                    "W_P", [self._num_units, self._num_proj], dtype,
                    self._num_proj_shards)

                m = math_ops.matmul(m, concat_w_proj)
                if self._proj_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    m = clip_ops.clip_by_value(m, -self._proj_clip,
                                               self._proj_clip)
                    # pylint: enable=invalid-unary-operand-type
            new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else
                         array_ops.concat([c, m], 1))
        return m, new_state
Beispiel #37
0
    def __call__(self, inputs, state, scope=None):

        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]

        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        if self._linear1 is None:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                if self._num_unit_shards is not None:
                    unit_scope.set_partitioner(
                        partitioned_variables.fixed_size_partitioner(
                            self._num_unit_shards))
                self._linear1 = _Linear([inputs, m_prev], 5 * self._num_units,
                                        True)

                # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        lstm_matrix = self._linear1([inputs, m_prev])

        i, j, f, o, g = array_ops.split(value=lstm_matrix,
                                        num_or_size_splits=5,
                                        axis=1)

        with tf.variable_scope("highway"):

            k = array_ops.split(value=_linear([inputs], self._num_units, True),
                                num_or_size_splits=1,
                                axis=1)

        # Diagonal connections
        if self._use_peepholes and not self._w_f_diag:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                with vs.variable_scope(unit_scope):
                    self._w_f_diag = vs.get_variable("w_f_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_i_diag = vs.get_variable("w_i_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_o_diag = vs.get_variable("w_o_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)

        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) *
                 c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type
        if self._use_peepholes:
            sigmoidg = sigmoid(g)
            su = 1. - sigmoidg

            m = sigmoidg * sigmoid(o + self._w_o_diag * c) * self._activation(
                c) + tf.squeeze(su * k, axis=0)
        else:
            sigmoidg = sigmoid(g)
            su = 1. - sigmoidg
            m = sigmoidg * sigmoid(o) * self._activation(c) + tf.squeeze(
                su * k, axis=0)

        if self._num_proj is not None:
            if self._linear2 is None:
                scope = vs.get_variable_scope()
                with vs.variable_scope(scope, initializer=self._initializer):
                    with vs.variable_scope("projection") as proj_scope:
                        if self._num_proj_shards is not None:
                            proj_scope.set_partitioner(
                                partitioned_variables.fixed_size_partitioner(
                                    self._num_proj_shards))
                        self._linear2 = _Linear(m, self._num_proj, False)
            m = self._linear2(m)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))

        return m, new_state
Beispiel #38
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.
        Args:
          inputs: input Tensor, 2D, batch x num_units.
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.
          scope: VariableScope for the created subgraph; defaults to "LSTMCell".
        Returns:
          A tuple containing:
          - A `2-D, [batch x output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.
        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        with vs.variable_scope(scope or type(self).__name__,
                               initializer=self._initializer):  # "LSTMCell"
            concat_w = _get_concat_variable(
                "W", [input_size.value + num_proj, 4 * self._num_units], dtype,
                self._num_unit_shards)

            b = vs.get_variable("B",
                                shape=[4 * self._num_units],
                                initializer=init_ops.zeros_initializer,
                                dtype=dtype)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            cell_inputs = array_ops.concat([inputs, m_prev], 1)
            lstm_matrix = nn_ops.bias_add(
                math_ops.matmul(cell_inputs, concat_w), b)
            i, j, f, o = array_ops.split(lstm_matrix, 4, 1)

            # Diagonal connections
            if self._use_peepholes:
                w_f_diag = vs.get_variable("W_F_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_i_diag = vs.get_variable("W_I_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_o_diag = vs.get_variable("W_O_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)

            if self._use_peepholes:
                c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) *
                     c_prev +
                     sigmoid(i + w_i_diag * c_prev) * self._activation(j))
            else:
                c = (sigmoid(f + self._forget_bias) * c_prev +
                     sigmoid(i) * self._activation(j))

            if self._cell_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)
                # pylint: enable=invalid-unary-operand-type

            if self._use_peepholes:
                m = sigmoid(o + w_o_diag * c) * self._activation(c)
            else:
                m = sigmoid(o) * self._activation(c)

            if self._num_proj is not None:
                concat_w_proj = _get_concat_variable(
                    "W_P", [self._num_units, self._num_proj], dtype,
                    self._num_proj_shards)

                m = math_ops.matmul(m, concat_w_proj)
                if self._proj_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    m = clip_ops.clip_by_value(m, -self._proj_clip,
                                               self._proj_clip)
                    # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat(
                         [c, m],
                         1,
                     ))
        return m, new_state
Beispiel #39
0
    def call(self, inputs, state, training=None):
        """Run one step of LSTM.
        Args:
          inputs: input Tensor, must be 2-D, `[batch, input_size]`.
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, [batch, state_size]`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.
        Returns:
          A tuple containing:
          - A `2-D, [batch, output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.
        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        [m_prev, c_prev] = state
        input_size = inputs.get_shape().with_rank(2).dims[1].value
        if input_size is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        lstm_matrix = tf.matmul(array_ops.concat([inputs, m_prev], 1),
                                self._kernel)
        lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias)

        i, j, f, o = array_ops.split(value=lstm_matrix,
                                     num_or_size_splits=4,
                                     axis=1)
        # Diagonal connections
        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) *
                 c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type
        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            m = math_ops.matmul(m, self._proj_kernel)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type
        o = m
        if self.residual_connection:
            o = o + inputs
        return o, [m, c]
Beispiel #40
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: state Tensor, 2D, batch x state_size.
      scope: VariableScope for the created subgraph; defaults to
        "TimeFreqLSTMCell".

    Returns:
      A tuple containing:
      - A 2D, batch x output_dim, Tensor representing the output of the LSTM
        after reading "inputs" when previous state was "state".
        Here output_dim is num_units.
      - A 2D, batch x state_size, Tensor representing the new state of LSTM
        after reading "inputs" when previous state was "state".
    Raises:
      ValueError: if an input_size was specified and the provided inputs have
        a different dimension.
    """
        sigmoid = math_ops.sigmoid
        tanh = math_ops.tanh

        freq_inputs = self._make_tf_features(inputs)
        dtype = inputs.dtype
        actual_input_size = freq_inputs[0].get_shape().as_list()[1]
        with vs.variable_scope(
                scope or type(self).__name__,
                initializer=self._initializer):  # "TimeFreqLSTMCell"
            concat_w = _get_concat_variable(
                "W",
                [actual_input_size + 2 * self._num_units, 4 * self._num_units],
                dtype, self._num_unit_shards)
            b = vs.get_variable("B",
                                shape=[4 * self._num_units],
                                initializer=array_ops.zeros_initializer,
                                dtype=dtype)

            # Diagonal connections
            if self._use_peepholes:
                w_f_diag = vs.get_variable("W_F_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_i_diag = vs.get_variable("W_I_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_o_diag = vs.get_variable("W_O_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)

            # initialize the first freq state to be zero
            m_prev_freq = array_ops.zeros(
                [int(inputs.get_shape()[0]), self._num_units], dtype)
            for fq in range(len(freq_inputs)):
                c_prev = array_ops.slice(state, [0, 2 * fq * self._num_units],
                                         [-1, self._num_units])
                m_prev = array_ops.slice(state,
                                         [0, (2 * fq + 1) * self._num_units],
                                         [-1, self._num_units])
                # i = input_gate, j = new_input, f = forget_gate, o = output_gate
                cell_inputs = array_ops.concat(
                    1, [freq_inputs[fq], m_prev, m_prev_freq])
                lstm_matrix = nn_ops.bias_add(
                    math_ops.matmul(cell_inputs, concat_w), b)
                i, j, f, o = array_ops.split(1, 4, lstm_matrix)

                if self._use_peepholes:
                    c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) *
                         c_prev + sigmoid(i + w_i_diag * c_prev) * tanh(j))
                else:
                    c = (sigmoid(f + self._forget_bias) * c_prev +
                         sigmoid(i) * tanh(j))

                if self._cell_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    c = clip_ops.clip_by_value(c, -self._cell_clip,
                                               self._cell_clip)
                    # pylint: enable=invalid-unary-operand-type

                if self._use_peepholes:
                    m = sigmoid(o + w_o_diag * c) * tanh(c)
                else:
                    m = sigmoid(o) * tanh(c)
                m_prev_freq = m
                if fq == 0:
                    state_out = array_ops.concat(1, [c, m])
                    m_out = m
                else:
                    state_out = array_ops.concat(1, [state_out, c, m])
                    m_out = array_ops.concat(1, [m_out, m])
        return m_out, state_out
Beispiel #41
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.

        Args:
          inputs: input Tensor, 2D, batch x num_units.
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.
          scope: VariableScope for the created subgraph; defaults to "LSTMCell".

        Returns:
          A tuple containing:
          - A `2-D, [batch x output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.

        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        
        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])
        
        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
        with vs.variable_scope(scope or type(self).__name__,
                               initializer=self._initializer):  # "LSTMCell"
            i_size = input_size.value - 1  # -1 to extract time
            times = array_ops.slice(inputs, [0, i_size], [-1, 1])
            filtered_inputs = array_ops.slice(inputs, [0, 0], [-1, i_size])
            
            # --------------------------------------- #
            # ------------- PHASED LSTM ------------- #
            # ---------------- BEGIN ---------------- #
            # --------------------------------------- #
            
            tau = vs.get_variable(
                "T", shape=[self._num_units],
                initializer=random_exp_initializer(0,
                                                   self.tau_init) if not self.manual_set else init_ops.constant_initializer(
                    self.tau_init),
                trainable=self.trainable, dtype=dtype)
            
            r_on = vs.get_variable(
                "R", shape=[self._num_units],
                initializer=init_ops.constant_initializer(self.r_on_init),
                trainable=self.trainable, dtype=dtype)
            
            s = vs.get_variable(
                "S", shape=[self._num_units],
                initializer=init_ops.random_uniform_initializer(0.,
                                                                tau.initialized_value()) if not self.manual_set else init_ops.constant_initializer(
                    0.),
                trainable=self.trainable, dtype=dtype)
            # for backward compatibility (v < 0.12.0) use the following line instead of the above
            # initializer = init_ops.random_uniform_initializer(0., tau), dtype = dtype)
            
            tau_broadcast = tf.expand_dims(tau, dim=0)
            r_on_broadcast = tf.expand_dims(r_on, dim=0)
            s_broadcast = tf.expand_dims(s, dim=0)
            
            r_on_broadcast = tf.abs(r_on_broadcast)
            tau_broadcast = tf.abs(tau_broadcast)
            times = tf.tile(times, [1, self._num_units])
            
            # calculate kronos gate
            phi = tf.div(tf.mod(tf.mod(times - s_broadcast, tau_broadcast) + tau_broadcast, tau_broadcast),
                         tau_broadcast)
            is_up = tf.less(phi, (r_on_broadcast * 0.5))
            is_down = tf.logical_and(tf.less(phi, r_on_broadcast), tf.logical_not(is_up))
            
            # when manually setting, hard on over r_on, else as previous
            if self.manual_set:
                k = tf.select(tf.logical_or(is_up, is_down), tf.to_float(is_up), self.alpha * phi)
            else:
                k = tf.select(is_up, phi / (r_on_broadcast * 0.5),
                              tf.select(is_down, 2. - 2. * (phi / r_on_broadcast), self.alpha * phi))
            
            # --------------------------------------- #
            # ------------- PHASED LSTM ------------- #
            # ----------------- END ----------------- #
            # --------------------------------------- #
            
            concat_w = _get_concat_variable(
                "W", [i_size + num_proj, 4 * self._num_units],
                dtype, self._num_unit_shards)
            
            b = vs.get_variable(
                "B", shape=[4 * self._num_units],
                initializer=init_ops.zeros_initializer, dtype=dtype)
            
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            cell_inputs = array_ops.concat(1, [filtered_inputs, m_prev])
            lstm_matrix = nn_ops.bias_add(math_ops.matmul(cell_inputs, concat_w), b)
            i, j, f, o = array_ops.split(1, 4, lstm_matrix)
            
            # Diagonal connections
            if self._use_peepholes:
                w_f_diag = vs.get_variable(
                    "W_F_diag", shape=[self._num_units], dtype=dtype)
                w_i_diag = vs.get_variable(
                    "W_I_diag", shape=[self._num_units], dtype=dtype)
                w_o_diag = vs.get_variable(
                    "W_O_diag", shape=[self._num_units], dtype=dtype)
            
            if self._use_peepholes:
                c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
                     sigmoid(i + w_i_diag * c_prev) * self._activation(j))
            else:
                c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
                     self._activation(j))
            
            if self._cell_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
                # pylint: enable=invalid-unary-operand-type
            
            if self._use_peepholes:
                m = sigmoid(o + w_o_diag * c) * self._activation(c)
            else:
                m = sigmoid(o) * self._activation(c)
            
            if self._num_proj is not None:
                concat_w_proj = _get_concat_variable(
                    "W_P", [self._num_units, self._num_proj],
                    dtype, self._num_proj_shards)
                
                m = tf.math_ops.matmul(m, concat_w_proj)
                if self._proj_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
                    # pylint: enable=invalid-unary-operand-type
            
            # APPLY KRONOS GATE
            c = k * c + (1. - k) * c_prev
            m = k * m + (1. - k) * m_prev
            # END KRONOS GATE
        
        new_state = (LSTMStateTuple(c, m) if self._state_is_tuple
                     else array_ops.concat(1, [c, m]))
        return m, new_state
Beispiel #42
0
    def call(self, inputs, state):
        """Run one step of LSTM.
            Args:
                inputs: input Tensor, 2D, `[batch, num_units].
                state: if `state_is_tuple` is False, this must be a state Tensor,
                `2-D, [batch, state_size]`.  If `state_is_tuple` is True, this must be a
                tuple of state Tensors, both `2-D`, with column sizes `c_state` and
                `m_state`.

            Returns:
                A tuple containing:

                - A `2-D, [batch, output_dim]`, Tensor representing the output of the
                LSTM after reading `inputs` when previous state was `state`.
                Here output_dim is:
                    num_proj if num_proj was set,
                    num_units otherwise.
                - Tensor(s) representing the new state of LSTM after reading `inputs` when
                the previous state was `state`.  Same type and shape(s) as `state`.

            Raises:
                ValueError: If input size cannot be inferred from inputs via
                static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        sigmoid = math_ops.sigmoid
        if self._is_quant:
            self._activation = tf.nn.relu
            sigmoid = hard_sigmoid

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        with tf.variable_scope('w_matmul_b_add'):
            _kernel = tf.identity(self._kernel, name='weights')

            if not self._weight_drop_kr == 1.0:
                if self._use_vd:
                    with tf.variable_scope('var_weight_drop_connect'):
                        _kernel = self.vd(_kernel)

                else:
                    with tf.variable_scope('weight_drop_connect'):
                        w1, w2 = _kernel.get_shape().as_list()
                        _kernel = tf.reshape(_kernel, [-1])
                        _kernel = dropout(_kernel,
                                          keep_prob=self._weight_drop_kr)
                        _kernel = tf.reshape(_kernel, [w1, w2])

            lstm_matrix = math_ops.matmul(
                array_ops.concat([inputs, m_prev], 1), _kernel)

            lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias)

            if self._is_quant:
                self._quant_ops['lstm_matrix'] = lstm_matrix

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=lstm_matrix,
                                     num_or_size_splits=4,
                                     axis=1)

        if self._is_quant:
            self._quant_ops['i'] = i

        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) *
                 c_prev +
                 sigmoid(i + self._input_bias + self._w_i_diag * c_prev) *
                 self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i + self._input_bias) * self._activation(j))

        if self._cell_clip is not None:
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            with tf.variable_scope('projection') as scope:
                _proj_kernel = tf.identity(self._proj_kernel, name='weights')
                m = math_ops.matmul(m, _proj_kernel)

            if self._proj_clip is not None:
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)

            if self._is_quant:
                self._quant_ops['proj_kernel'] = _proj_kernel

        c = tf.identity(c, name='end_c')
        m = tf.identity(m, name='end_m')

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))

        if self._is_quant:
            insert_quant_ops(self._quant_ops, is_train=self._is_train)

        return m, new_state
Beispiel #43
0
def arcsinh(x, name="arcsinh"):
  """Inverse hyperbolic sin:  `arcsinh(x) = log(x + sqrt(x**2 + 1))`.

  For `x in (-inf, inf)`, `arcsinh(sinh(x)) = sinh(arcsinh(x)) = x.`

  Args:
    x:  Numeric `Tensor`.
    name:  A string name to prepend to created Ops.

  Returns:
    Numeric `Tensor` of same `shape` and `dtype` as `x`.
  """
  with ops.name_scope(name):
    x = ops.convert_to_tensor(x, name="x")
    finfo = np.finfo(x.dtype.as_numpy_dtype)

    # To compute stable arcsinh(x), we will compute various approximations of
    # z := x + sqrt(x**2 + 1), and then arcsinh(x) = log(z).
    # Different approximations are used over different values of x, then the
    # result is pieced together using tf.where.  Since NaN propagate through the
    # unselected branch during grdients of where, care is taken to ensure that
    # every approximation is finite for all input values.

    # For x near zero, the straightforward formula for z is fine.
    # This formula will have trouble once x < 0 and x**2 + 1 = x**2, since then
    # z = 0 (numerically), and then we have log(z) = log(0) = -inf
    # This formula also has trouble once x > sqrt(finfo.max), since then
    # x**2 = inf, and thus z = inf.  Therefore we clip.
    x_near_zero = clip_ops.clip_by_value(x, -1., 1.)
    x_is_near_zero = math_ops.abs(x) < 1.
    z_for_x_near_zero = x + math_ops.sqrt(x_near_zero**2 + 1)

    # Some cutoffs.
    # Important!  Keep these cutoffs in sync with the tests, which use cutoffs
    # of the exact same name.
    # very_big_cutoff**2 = finfo.max, the maximum representable value.
    # very_small_cutoff could have been defined as 1 / sqrt(eps), so that for
    # x < very_small_cutoff, 1 / x**2 + 1 = 1, which causes trouble.
    # The 0.01 was added in order to make this match numpy in 32bit
    # as much as possible.  Anything < 1 should be stable.
    very_small_cutoff = -0.01 / np.sqrt(finfo.eps)
    very_big_cutoff = np.sqrt(finfo.max)

    # For very_small_cutoff < x < -1, and 1 < x < very_big_cutoff, and
    # x != 0, we can use
    # z = sqrt(x**2 + 1) = |x| * sqrt(1 + 1 / x**2).
    # This formula has trouble if x < -sqrt(eps) since then 1 + 1 / x**2 = 1,
    # and then we get z = x + |x| = 0, thus returning log(0) = -inf.
    # This formula also has trouble if x**2 = Inf.  Therefore we clip.
    # This formula also has trouble if x = 0, since then we have 1 / 0**2 = inf.
    x_not_near_zero = array_ops.where(
        x >= 0.,
        math_ops.maximum(x, 1.),
        math_ops.minimum(x, -1.))
    x_clipped_moderate_or_big = clip_ops.clip_by_value(
        x_not_near_zero, very_small_cutoff, very_big_cutoff)
    z_for_moderate_or_big_x = x + math_ops.abs(x) * math_ops.sqrt(
        1. + 1. / x_clipped_moderate_or_big**2)

    # For x < very_small_cutoff, we use the first order Taylor series,
    # sqrt(1 + 1 / x**2) approx 1 + 1 / (2 * x**2)
    # This formula has trouble for x = 0.
    x_is_very_small = x < very_small_cutoff
    z_for_very_small_x = 1 / (2. * math_ops.abs(x_not_near_zero))

    z = array_ops.where(
        x_is_near_zero,
        z_for_x_near_zero,
        array_ops.where(
            x_is_very_small,
            z_for_very_small_x,
            z_for_moderate_or_big_x))

    return math_ops.log(z)
Beispiel #44
0
 def loop_fn(i):
     a = array_ops.gather(x, i)
     return clip_ops.clip_by_value(a, 0.5, 1.0)
Beispiel #45
0
def default_model(num_action, input_shape, actor_critic='actor'):
    from tensorflow.python.keras.models import Model
    from tensorflow.python.keras.layers import Input, Lambda, Concatenate
    LR = 1e-4  # Lower lr stabilises training greatly
    img_in = Input(shape=input_shape, name='img_in')
    EPSILON = 2e-3
    if actor_critic == "actor":
        # Perception

        x = Convolution2D(filters=24,
                          kernel_size=(5, 5),
                          strides=(2, 2),
                          activation='relu')(img_in)
        x = Convolution2D(filters=32,
                          kernel_size=(5, 5),
                          strides=(2, 2),
                          activation='relu')(x)
        x = Convolution2D(filters=64,
                          kernel_size=(5, 5),
                          strides=(2, 2),
                          activation='relu')(x)
        x = Convolution2D(filters=64,
                          kernel_size=(3, 3),
                          strides=(2, 2),
                          activation='relu')(x)
        x = Convolution2D(filters=64,
                          kernel_size=(3, 3),
                          strides=(1, 1),
                          activation='relu')(x)
        x = Flatten(name='flattened')(x)
        s_in = Input(shape=(1, ), name='speed')

        # speed layer
        s = Dense(64)(s_in)
        s = Dropout(0.5)(s)
        s = Activation('relu')(s)
        s = Dense(64)(s)
        s = Dropout(0.5)(s)
        s = Activation('relu')(s)

        # action layer
        o = Concatenate(axis=1)([x, s])
        o = Dense(64)(o)
        o = Dropout(0.5)(o)
        o = Activation('relu')(o)

        mu = Dense(num_action)(o)
        mu = Activation('tanh')(mu)
        std = Dense(num_action)(o)
        std = Activation('tanh')(std)
        log_std = Lambda(lambda x: clip_ops.clip_by_value(x, -20, 2))(std)

        std = tf.exp(log_std)
        dist = tfp.distributions.Normal(mu, std)
        actions = dist.sample()
        actions = tf.tanh(actions)

        log_pi = dist.log_prob(actions)
        log_pi = log_pi - tf.reduce_sum(
            tf.math.log(1 - actions**2 + EPSILON), axis=1, keepdims=True)
        model = Model(inputs=[img_in, s_in], outputs=[actions, log_pi])

        # action, action_matrix, prediction from trial_run
        # reward is a function( angle, throttle)
        return model

    if actor_critic == 'critic':
        # Perception
        x = Convolution2D(filters=24,
                          kernel_size=(5, 5),
                          strides=(2, 2),
                          activation='relu')(img_in)
        x = Convolution2D(filters=32,
                          kernel_size=(5, 5),
                          strides=(2, 2),
                          activation='relu')(x)
        x = Convolution2D(filters=64,
                          kernel_size=(5, 5),
                          strides=(2, 2),
                          activation='relu')(x)
        x = Convolution2D(filters=64,
                          kernel_size=(3, 3),
                          strides=(2, 2),
                          activation='relu')(x)
        x = Convolution2D(filters=64,
                          kernel_size=(3, 3),
                          strides=(1, 1),
                          activation='relu')(x)
        x = Flatten(name='flattened')(x)
        s_in = Input(shape=(1, ), name='speed')
        a_in = Input(shape=(2, ), name='actions')

        # speed layer
        s = Dense(64)(s_in)
        s = Dropout(0.5)(s)
        s = Activation('relu')(s)
        s = Dense(64)(s)
        s = Dropout(0.5)(s)
        s = Activation('relu')(s)

        # actions_layer
        a = Dense(64)(a_in)
        a = Dropout(0.5)(a)
        a = Activation('relu')(a)
        a = Dense(32)(a)
        a = Dropout(0.5)(a)
        a = Activation('relu')(a)

        o = Concatenate(axis=1)([x, s, a])
        o = Dense(64)(o)
        o = Dropout(0.5)(o)
        o = Activation('relu')(o)
        q = Dense(1)(o)
        model = Model(inputs=[img_in, s_in, a_in], outputs=q)

        return model
def _clip_by_value_preserve_grad(x, clip_value_min, clip_value_max, name=None):
    """Clips input while leaving gradient unaltered."""
    with ops.name_scope(name, "clip_by_value_preserve_grad",
                        [x, clip_value_min, clip_value_max]):
        clip_x = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
        return x + array_ops.stop_gradient(clip_x - x)
Beispiel #47
0
  def __call__(self, inputs, state, scope=None):
    """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: state Tensor, 2D, batch x state_size.
      scope: VariableScope for the created subgraph; defaults to "LSTMCell".

    Returns:
      A tuple containing:
      - A 2D, batch x output_dim, Tensor representing the output of the LSTM
        after reading "inputs" when previous state was "state".
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - A 2D, batch x state_size, Tensor representing the new state of LSTM
        after reading "inputs" when previous state was "state".

    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
    num_proj = self._num_units if self._num_proj is None else self._num_proj

    c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
    m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

    dtype = inputs.dtype
    input_size = inputs.get_shape().with_rank(2)[1]
    if input_size.value is None:
      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
    with vs.variable_scope(scope or type(self).__name__,
                           initializer=self._initializer):  # "LSTMCell"
      concat_w = _get_concat_variable(
          "W", [input_size.value + num_proj, 4 * self._num_units],
          dtype, self._num_unit_shards)

      b = vs.get_variable(
          "B", shape=[4 * self._num_units],
          initializer=array_ops.zeros_initializer, dtype=dtype)

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      cell_inputs = array_ops.concat(1, [inputs, m_prev])
      lstm_matrix = nn_ops.bias_add(math_ops.matmul(cell_inputs, concat_w), b)
      i, j, f, o = array_ops.split(1, 4, lstm_matrix)

      # Diagonal connections
      if self._use_peepholes:
        w_f_diag = vs.get_variable(
            "W_F_diag", shape=[self._num_units], dtype=dtype)
        w_i_diag = vs.get_variable(
            "W_I_diag", shape=[self._num_units], dtype=dtype)
        w_o_diag = vs.get_variable(
            "W_O_diag", shape=[self._num_units], dtype=dtype)

      if self._use_peepholes:
        c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
             sigmoid(i + w_i_diag * c_prev) * tanh(j))
      else:
        c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * tanh(j))

      if self._cell_clip is not None:
        # pylint: disable=invalid-unary-operand-type
        c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
        # pylint: enable=invalid-unary-operand-type

      if self._use_peepholes:
        m = sigmoid(o + w_o_diag * c) * tanh(c)
      else:
        m = sigmoid(o) * tanh(c)

      if self._num_proj is not None:
        concat_w_proj = _get_concat_variable(
            "W_P", [self._num_units, self._num_proj],
            dtype, self._num_proj_shards)

        m = math_ops.matmul(m, concat_w_proj)

    return m, array_ops.concat(1, [c, m])
Beispiel #48
0
def histogram_fixed_width(values,
                          value_range,
                          nbins=100,
                          dtype=dtypes.int32,
                          name=None):
    """Return histogram of values.

  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A 1-D `Tensor` holding histogram of values.

  Examples:

  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.default_session() as sess:
    hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
    variables.global_variables_initializer().run()
    sess.run(hist) => [2, 1, 1, 0, 2]
  ```
  """
    with ops.name_scope(name, 'histogram_fixed_width',
                        [values, value_range, nbins]) as scope:
        values = ops.convert_to_tensor(values, name='values')
        values = array_ops.reshape(values, [-1])
        value_range = ops.convert_to_tensor(value_range, name='value_range')
        nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
        nbins_float = math_ops.cast(nbins, values.dtype)

        # Map tensor values that fall within value_range to [0, 1].
        scaled_values = math_ops.truediv(values - value_range[0],
                                         value_range[1] - value_range[0],
                                         name='scaled_values')

        # map tensor values within the open interval value_range to {0,.., nbins-1},
        # values outside the open interval will be zero or less, or nbins or more.
        indices = math_ops.floor(nbins_float * scaled_values, name='indices')

        # Clip edge cases (e.g. value = value_range[1]) or "outliers."
        indices = math_ops.cast(
            clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)

        # TODO(langmore) This creates an array of ones to add up and place in the
        # bins.  This is inefficient, so replace when a better Op is available.
        return math_ops.unsorted_segment_sum(array_ops.ones_like(indices,
                                                                 dtype=dtype),
                                             indices,
                                             nbins,
                                             name=scope)
Beispiel #49
0
 def _log_prob(self, x):
     x = self._assert_valid_sample(x, check_integer=False)
     x = clip_ops.clip_by_value(x, 1e-6, x)
     return math_ops.log(self.alpha) + self.alpha * math_ops.log(
         self.sigma) - (self.alpha + 1) * math_ops.log(x)
Beispiel #50
0
def _StatelessParameterizedTruncatedNormalGrad(op, grad):  # pylint: disable=invalid-name
  """Returns the gradient of a TruncatedNormal sample w.r.t. parameters.

  The gradient is computed using implicit differentiation
  (Figurnov et al., 2018).

  Args:
    op: A `StatelessParameterizedTruncatedNormal` operation. We assume that the
      inputs to the operation are `shape`, `seed`, `mean`, `stddev`, `minval`,
      and `maxval` tensors, and the output is the `sample` tensor.
    grad: The incoming gradient `dloss / dsample` of the same shape as
      `op.outputs[0]`.

  Returns:
    A list of `Tensor` with derivates with respect to each parameter.

  References:
    Implicit Reparameterization Gradients:
      [Figurnov et al., 2018]
      (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients)
      ([pdf]
      (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf))
  """
  shape = op.inputs[0]
  mean = op.inputs[2]
  stddev = op.inputs[3]
  minval = op.inputs[4]
  maxval = op.inputs[5]
  sample = op.outputs[0]

  with ops.control_dependencies([grad]):
    minval_std = (minval - mean) / stddev
    maxval_std = (maxval - mean) / stddev
    sample_std = (sample - mean) / stddev

    cdf_sample = (_Ndtr(sample_std) - _Ndtr(minval_std)) / (
        _Ndtr(maxval_std) - _Ndtr(minval_std))

    # Clip to avoid zero argument for log_cdf expression
    tiny = np.finfo(mean.dtype.as_numpy_dtype).tiny
    eps = np.finfo(mean.dtype.as_numpy_dtype).eps
    cdf_sample = clip_ops.clip_by_value(cdf_sample, tiny, 1 - eps)

    dmaxval = math_ops.exp(0.5 * (sample_std ** 2 - maxval_std ** 2) +
                           math_ops.log(cdf_sample))
    dminval = math_ops.exp(0.5 * (sample_std ** 2 - minval_std ** 2) +
                           math_ops.log1p(-cdf_sample))
    dmean = array_ops.ones_like(sample_std)
    dstddev = sample_std

    # Reduce over extra dimensions caused by `shape`. We need to get the
    # difference in rank from shape vs. the broadcasted rank.

    mean_shape = array_ops.shape(mean)
    stddev_shape = array_ops.shape(stddev)
    minval_shape = array_ops.shape(minval)
    maxval_shape = array_ops.shape(maxval)

    broadcast_shape = array_ops.broadcast_dynamic_shape(
        mean_shape, stddev_shape)
    broadcast_shape = array_ops.broadcast_dynamic_shape(
        minval_shape, broadcast_shape)
    broadcast_shape = array_ops.broadcast_dynamic_shape(
        maxval_shape, broadcast_shape)
    extra_dims = math_ops.range(
        array_ops.size(shape) - array_ops.size(broadcast_shape))

    grad_mean = math_ops.reduce_sum(grad * dmean, axis=extra_dims)
    grad_stddev = math_ops.reduce_sum(grad * dstddev, axis=extra_dims)
    grad_minval = math_ops.reduce_sum(grad * dminval, axis=extra_dims)
    grad_maxval = math_ops.reduce_sum(grad * dmaxval, axis=extra_dims)

    _, rmean = gen_array_ops.broadcast_gradient_args(
        broadcast_shape, mean_shape)
    _, rstddev = gen_array_ops.broadcast_gradient_args(
        broadcast_shape, stddev_shape)
    _, rminval = gen_array_ops.broadcast_gradient_args(
        broadcast_shape, minval_shape)
    _, rmaxval = gen_array_ops.broadcast_gradient_args(
        broadcast_shape, maxval_shape)

    grad_mean = array_ops.reshape(
        math_ops.reduce_sum(grad_mean, axis=rmean, keepdims=True), mean_shape)

    grad_stddev = array_ops.reshape(
        math_ops.reduce_sum(grad_stddev, axis=rstddev, keepdims=True),
        stddev_shape)

    grad_minval = array_ops.reshape(
        math_ops.reduce_sum(grad_minval, axis=rminval, keepdims=True),
        minval_shape)

    grad_maxval = array_ops.reshape(
        math_ops.reduce_sum(grad_maxval, axis=rmaxval, keepdims=True),
        maxval_shape)

    # The first two inputs are shape.
    return (None, None, grad_mean, grad_stddev, grad_minval, grad_maxval)
    def call(self, inputs, state):
        """Run one step of LSTM.
    
        Args:
          inputs: input Tensor, 2D, batch x num_units.
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.
          scope: VariableScope for the created subgraph; defaults to "lstm_cell".
    
        Returns:
          A tuple containing:
    
          - A `2-D, [batch x output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.
    
        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        
        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])
        
        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]

        if input_size.value is None:
            raise ValueError("Could not infer input size from inputs.get_shape()[-1]")

        # --------------------------------------- #
        # ------------- PHASED LSTM ------------- #
        # ---------------- BEGIN ---------------- #
        # --------------------------------------- #

        i_size = input_size.value - 1  # -1 to extract time
        times = array_ops.slice(inputs, [0, i_size], [-1, 1])
        filtered_inputs = array_ops.slice(inputs, [0, 0], [-1, i_size])

        tau = vs.get_variable(
            "T", shape=[self._num_units],
            initializer=random_exp_initializer(0, self.tau_init) if not self.manual_set else init_ops.constant_initializer(self.tau_init),
            trainable=self.trainable, dtype=dtype)

        r_on = vs.get_variable(
            "R", shape=[self._num_units],
            initializer=init_ops.constant_initializer(self.r_on_init),
            trainable=self.trainable, dtype=dtype)

        s = vs.get_variable(
            "S", shape=[self._num_units],
            initializer=init_ops.random_uniform_initializer(0., tau.initialized_value()) if not self.manual_set else init_ops.constant_initializer(0.),
            trainable=self.trainable, dtype=dtype)

        tau_broadcast = tf.expand_dims(tau, axis=0)
        r_on_broadcast = tf.expand_dims(r_on, axis=0)
        s_broadcast = tf.expand_dims(s, axis=0)

        r_on_broadcast = tf.abs(r_on_broadcast)
        tau_broadcast = tf.abs(tau_broadcast)
        times = tf.tile(times, [1, self._num_units])

        # calculate kronos gate
        phi = tf.div(tf.mod(tf.mod(times - s_broadcast, tau_broadcast) + tau_broadcast, tau_broadcast), tau_broadcast)
        is_up = tf.less(phi, (r_on_broadcast * 0.5))
        is_down = tf.logical_and(tf.less(phi, r_on_broadcast), tf.logical_not(is_up))

        k = tf.where(is_up, phi / (r_on_broadcast * 0.5), tf.where(is_down, 2. - 2. * (phi / r_on_broadcast), self.alpha * phi))

        lstm_matrix = math_ops.matmul(array_ops.concat([filtered_inputs, m_prev], 1), self._kernel)
        lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias)

        # --------------------------------------- #
        # ------------- PHASED LSTM ------------- #
        # ----------------- END ----------------- #
        # --------------------------------------- #

        i, j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=4, axis=1)

        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type

        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:

            m = math_ops.matmul(m, self._proj_kernel)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
                # pylint: enable=invalid-unary-operand-type

        # APPLY KRONOS GATE
        c = k * c + (1. - k) * c_prev
        m = k * m + (1. - k) * m_prev
        # END KRONOS GATE
        
        new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat([c, m], 1))
        return m, new_state
Beispiel #52
0
    def call(self, inputs, state):
        """Run one step of LSTM.

        Args:
          inputs: input Tensor, 2D, `[batch, num_units].
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, [batch, state_size]`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.

        Returns:
          A tuple containing:

          - A `2-D, [batch, output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.

        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid


        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError("Could not infer input size from inputs.get_shape()[-1]")

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        lstm_matrix = math_ops.matmul(array_ops.concat([inputs, m_prev], 1), self._kernel)
        lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias)
        i, j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=4, axis=1)
        
        binary_mask_cell = dropout(array_ops.ones_like(c_prev), rate=self.cell_zoneout_rate, training=self.is_training)
        binary_mask_cell_complement = array_ops.ones_like(binary_mask_cell) - binary_mask_cell
        binary_mask_output = dropout(array_ops.ones_like(m_prev), rate=self.output_zoneout_rate, training=self.is_training)
        binary_mask_output_complement = array_ops.ones_like(binary_mask_output) - binary_mask_output

        # Diagonal connections
        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
                sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
                self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type
        
        c = binary_mask_cell * c + binary_mask_cell_complement * c_prev;

        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            m = math_ops.matmul(m, self._proj_kernel)

        if self._proj_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
            # pylint: enable=invalid-unary-operand-type

        m = binary_mask_output * m + binary_mask_output_complement * m_prev;

        new_state = LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat([c, m], 1)
        
        return m, new_state
Beispiel #53
0
 def hard_sigmoid(x):  # pylint: disable=redefined-outer-name
     with get_name_scope(name=name):
         x = clip_ops.clip_by_value(x, clip_value_min=0., clip_value_max=1.)
         return built_activation(x, collect)
Beispiel #54
0
def mean_absolute_percentage_error(labels,
                                   predictions,
                                   weights=None,
                                   metrics_collections=None,
                                   updates_collections=None,
                                   name=None):
    """Computes the mean absolute percentage error between the labels and predictions.

  The `mean_absolute_percentage_error` function creates two local variables,
  `total` and `count` that are used to compute the mean absolute percentage error.
  This average is weighted by `weights`, and it is ultimately returned as
  `mean_absolute_percentage_error`: an idempotent operation that simply divides `total`
  by `count`.

  For estimation of the metric over a stream of data, the function creates an
  `update_op` operation that updates these variables and returns the
  `mean_absolute_percentage_error`. Internally, an `absolute_percentage_errors` operation
  computes the absolute value of the percentage differences between `predictions` and `labels`.
  Then `update_op` increments `total` with the reduced sum of the product of
  `weights` and `absolute_percentage_errors`, and it increments `count` with the reduced
  sum of `weights`

  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

  Args:
    labels: A `Tensor` of the same shape as `predictions`.
    predictions: A `Tensor` of arbitrary shape.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `labels` dimension).
    metrics_collections: An optional list of collections that
      `mean_absolute_percentage_error` should be added to.
    updates_collections: An optional list of collections that `update_op` should
      be added to.
    name: An optional variable_scope name.

  Returns:
    mean_absolute_percentage_error: A `Tensor` representing the current mean, the value
    of `total` divided by `count`.
    update_op: An operation that increments the `total` and `count` variables
      appropriately and whose value matches `mean_absolute_percentage_error`.

  Raises:
    ValueError: If `predictions` and `labels` have mismatched shapes, or if
      `weights` is not `None` and its shape doesn't match `predictions`, or if
      either `metrics_collections` or `updates_collections` are not a list or
      tuple.
    RuntimeError: If eager execution is enabled.
  """
    if context.executing_eagerly():
        raise RuntimeError(
            'tf.metrics.mean_absolute_percentage_error is not supported '
            'when eager execution is enabled.')

    if predictions.dtype in (dtypes.float16, dtypes.float32, dtypes.float64) \
        and labels.dtype != predictions.dtype:
        labels = math_ops.cast(labels, predictions.dtype)
    elif labels.dtype in (dtypes.float16, dtypes.float32, dtypes.float64) \
        and labels.dtype != predictions.dtype:
        predictions = math_ops.cast(predictions, labels.dtype)
    else:
        labels = math_ops.cast(labels, dtypes.float32)
        predictions = math_ops.cast(predictions, dtypes.float32)

    predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(
        predictions=predictions, labels=labels, weights=weights)
    min_value = constant_op.constant(EPSILON, dtype=dtypes.float32)
    max_value = constant_op.constant(float('Inf'), dtype=dtypes.float32)
    percentage_absolute_errors = 100 * math_ops.abs(
        (predictions - labels) / math_ops.abs(
            clip_ops.clip_by_value(math_ops.abs(labels), min_value,
                                   max_value)))
    return metrics_impl.mean(percentage_absolute_errors, weights,
                             metrics_collections, updates_collections, name
                             or 'mape')
Beispiel #55
0
def histogram_fixed_width_bins(values,
                               value_range,
                               nbins=100,
                               dtype=dtypes.int32,
                               name=None):
  """Bins the given values for use in a histogram.

  Given the tensor `values`, this operation returns a rank 1 `Tensor`
  representing the indices of a histogram into which each element
  of `values` would be binned. The bins are equal width and
  determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A `Tensor` holding the indices of the binned values whose shape matches
    `values`.

  Examples:

  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.compat.v1.get_default_session() as sess:
    indices = tf.histogram_fixed_width_bins(new_values, value_range, nbins=5)
    variables.global_variables_initializer().run()
    sess.run(indices) => [0, 0, 1, 2, 4]
  ```
  """
  with ops.name_scope(name, 'histogram_fixed_width_bins',
                      [values, value_range, nbins]):
    values = ops.convert_to_tensor(values, name='values')
    shape = array_ops.shape(values)

    values = array_ops.reshape(values, [-1])
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
    nbins_float = math_ops.cast(nbins, values.dtype)

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(
        values - value_range[0],
        value_range[1] - value_range[0],
        name='scaled_values')

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)
    return array_ops.reshape(indices, shape)
Beispiel #56
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: if `state_is_tuple` is False, this must be a state Tensor,
        `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
        `m_state`.
      scope: VariableScope for the created subgraph; defaults to "lstm_cell".

    Returns:
      A tuple containing:

      - A `2-D, [batch x output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.

    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        with vs.variable_scope(scope or "lstm_cell",
                               initializer=self._initializer) as unit_scope:
            if self._num_unit_shards is not None:
                unit_scope.set_partitioner(
                    partitioned_variables.fixed_size_partitioner(
                        self._num_unit_shards))
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            lstm_matrix = _linear([inputs, m_prev],
                                  4 * self._num_units,
                                  bias=True,
                                  scope=scope)
            i, j, f, o = array_ops.split(value=lstm_matrix,
                                         num_or_size_splits=4,
                                         axis=1)

            # Diagonal connections
            if self._use_peepholes:
                with vs.variable_scope(unit_scope) as projection_scope:
                    if self._num_unit_shards is not None:
                        projection_scope.set_partitioner(None)
                    w_f_diag = vs.get_variable("w_f_diag",
                                               shape=[self._num_units],
                                               dtype=dtype)
                    w_i_diag = vs.get_variable("w_i_diag",
                                               shape=[self._num_units],
                                               dtype=dtype)
                    w_o_diag = vs.get_variable("w_o_diag",
                                               shape=[self._num_units],
                                               dtype=dtype)

            if self._use_peepholes:
                c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) *
                     c_prev +
                     sigmoid(i + w_i_diag * c_prev) * self._activation(j))
            else:
                c = (sigmoid(f + self._forget_bias) * c_prev +
                     sigmoid(i) * self._activation(j))

            if self._cell_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)
                # pylint: enable=invalid-unary-operand-type

            if self._use_peepholes:
                m = sigmoid(o + w_o_diag * c) * self._activation(c)
            else:
                m = sigmoid(o) * self._activation(c)

            if self._num_proj is not None:
                with vs.variable_scope("projection") as proj_scope:
                    if self._num_proj_shards is not None:
                        proj_scope.set_partitioner(
                            partitioned_variables.fixed_size_partitioner(
                                self._num_proj_shards))
                    m = _linear(m, self._num_proj, bias=False, scope=scope)

                if self._proj_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    m = clip_ops.clip_by_value(m, -self._proj_clip,
                                               self._proj_clip)
                    # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else
                     array_ops.concat_v2([c, m], 1))
        return m, new_state
Beispiel #57
0
    def __call__(self, input_, state, scope=None):
        """Run one step of LSTM.

    Args:
      input_: input Tensor, 2D, batch x num_units.
      state: state Tensor, 2D, batch x state_size.
      scope: VariableScope for the created subgraph; defaults to "LSTMCell".

    Returns:
      A tuple containing:
      - A 2D, batch x output_dim, Tensor representing the output of the LSTM
        after reading "input_" when previous state was "state".
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - A 2D, batch x state_size, Tensor representing the new state of LSTM
        after reading "input_" when previous state was "state".
    """
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
        m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

        dtype = input_.dtype

        unit_shard_size = (4 * self._num_units) // self._num_unit_shards

        with vs.variable_scope(scope or type(self).__name__):  # "LSTMCell"
            w = array_ops.concat(1, [
                vs.get_variable(
                    "W_%d" % i,
                    shape=[self.input_size + num_proj, unit_shard_size],
                    initializer=self._initializer,
                    dtype=dtype) for i in xrange(self._num_unit_shards)
            ])

            b = vs.get_variable("B",
                                shape=[4 * self._num_units],
                                initializer=array_ops.zeros_initializer,
                                dtype=dtype)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            cell_inputs = array_ops.concat(1, [input_, m_prev])
            i, j, f, o = array_ops.split(
                1, 4, nn_ops.bias_add(math_ops.matmul(cell_inputs, w), b))

            # Diagonal connections
            if self._use_peepholes:
                w_f_diag = vs.get_variable("W_F_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_i_diag = vs.get_variable("W_I_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_o_diag = vs.get_variable("W_O_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)

            if self._use_peepholes:
                c = (sigmoid(f + 1 + w_f_diag * c_prev) * c_prev +
                     sigmoid(i + w_i_diag * c_prev) * tanh(j))
            else:
                c = (sigmoid(f + 1) * c_prev + sigmoid(i) * tanh(j))

            if self._cell_clip is not None:
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)

            if self._use_peepholes:
                m = sigmoid(o + w_o_diag * c) * tanh(c)
            else:
                m = sigmoid(o) * tanh(c)

            if self._num_proj is not None:
                proj_shard_size = self._num_proj // self._num_proj_shards
                w_proj = array_ops.concat(1, [
                    vs.get_variable("W_P_%d" % i,
                                    shape=[self._num_units, proj_shard_size],
                                    initializer=self._initializer,
                                    dtype=dtype)
                    for i in xrange(self._num_proj_shards)
                ])
                # TODO(ebrevdo), use matmulsum
                m = math_ops.matmul(m, w_proj)

        return m, array_ops.concat(1, [c, m])
Beispiel #58
0
    def call(self, inputs, state):
        sigmoid = math_ops.sigmoid

        (c_prev, m_prev) = state

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate, r = transform gate
        input_matrix = math_ops.matmul(inputs, self._input_kernel)
        if self.use_layer_norm:
            input_matrix = layer_norm(input_matrix)
        hidden_matrix = math_ops.matmul(m_prev, self._hidden_kernel)
        if self.use_layer_norm:
            hidden_matrix = layer_norm(hidden_matrix)
        input_matrix = nn_ops.bias_add(input_matrix, self._bias)

        if self._highway:
            i, j, f, o, r = array_ops.split(hidden_matrix +
                                            input_matrix[:, :-self._num_units],
                                            num_or_size_splits=5,
                                            axis=1)
            hx = input_matrix[:, -self._num_units:]

            i = sigmoid(i)
            o = sigmoid(o)
            f = sigmoid(f + self._forget_bias)
            j = self._activation(j)
            c = f * c_prev + i * j
            if self._cell_clip is not None:
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)

            t = sigmoid(r)
            _c = c
            if self.use_layer_norm:
                _c = layer_norm(_c)
            _m = o * self._activation(_c)
            m = t * _m + (1 - t) * hx

        else:
            i, j, f, o = array_ops.split(value=input_matrix + hidden_matrix,
                                         num_or_size_splits=4,
                                         axis=1)

            i = sigmoid(i)
            o = sigmoid(o)
            f = sigmoid(f + self._forget_bias)
            c = i * self._activation(j) + f * c_prev

            if self._cell_clip is not None:
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)
            _c = c
            if self.use_layer_norm:
                _c = layer_norm(_c)
            m = o * self._activation(_c)

        new_state = (LSTMStateTuple(c, m))
        return m, new_state
Beispiel #59
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: state Tensor, 2D, batch x state_size.
      scope: VariableScope for the created subgraph; defaults to "LSTMCell".

    Returns:
      A tuple containing:
      - A 2D, batch x output_dim, Tensor representing the output of the LSTM
        after reading "inputs" when previous state was "state".
        Here output_dim is num_units.
      - A 2D, batch x state_size, Tensor representing the new state of LSTM
        after reading "inputs" when previous state was "state".
    Raises:
      ValueError: if an input_size was specified and the provided inputs have
        a different dimension.
    """
        sigmoid = math_ops.sigmoid
        tanh = math_ops.tanh
        num_gates = 3 if self._couple_input_forget_gates else 4

        freq_inputs = self._make_tf_features(inputs)
        dtype = inputs.dtype
        actual_input_size = freq_inputs[0].get_shape().as_list()[1]
        with vs.variable_scope(
                scope or type(self).__name__,
                initializer=self._initializer):  # "GridLSTMCell"
            concat_w_f = _get_concat_variable("W_f", [
                actual_input_size + 2 * self._num_units,
                num_gates * self._num_units
            ], dtype, self._num_unit_shards)
            b_f = vs.get_variable("B_f",
                                  shape=[num_gates * self._num_units],
                                  initializer=array_ops.zeros_initializer,
                                  dtype=dtype)
            if not self._share_time_frequency_weights:
                concat_w_t = _get_concat_variable("W_t", [
                    actual_input_size + 2 * self._num_units,
                    num_gates * self._num_units
                ], dtype, self._num_unit_shards)
                b_t = vs.get_variable("B_t",
                                      shape=[num_gates * self._num_units],
                                      initializer=array_ops.zeros_initializer,
                                      dtype=dtype)

            if self._use_peepholes:
                # Diagonal connections
                if not self._couple_input_forget_gates:
                    w_f_diag_freqf = vs.get_variable("W_F_diag_freqf",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    w_f_diag_freqt = vs.get_variable("W_F_diag_freqt",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                w_i_diag_freqf = vs.get_variable("W_I_diag_freqf",
                                                 shape=[self._num_units],
                                                 dtype=dtype)
                w_i_diag_freqt = vs.get_variable("W_I_diag_freqt",
                                                 shape=[self._num_units],
                                                 dtype=dtype)
                w_o_diag_freqf = vs.get_variable("W_O_diag_freqf",
                                                 shape=[self._num_units],
                                                 dtype=dtype)
                w_o_diag_freqt = vs.get_variable("W_O_diag_freqt",
                                                 shape=[self._num_units],
                                                 dtype=dtype)
                if not self._share_time_frequency_weights:
                    if not self._couple_input_forget_gates:
                        w_f_diag_timef = vs.get_variable(
                            "W_F_diag_timef",
                            shape=[self._num_units],
                            dtype=dtype)
                        w_f_diag_timet = vs.get_variable(
                            "W_F_diag_timet",
                            shape=[self._num_units],
                            dtype=dtype)
                    w_i_diag_timef = vs.get_variable("W_I_diag_timef",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    w_i_diag_timet = vs.get_variable("W_I_diag_timet",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    w_o_diag_timef = vs.get_variable("W_O_diag_timef",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    w_o_diag_timet = vs.get_variable("W_O_diag_timet",
                                                     shape=[self._num_units],
                                                     dtype=dtype)

            # initialize the first freq state to be zero
            m_prev_freq = array_ops.zeros(
                [int(inputs.get_shape()[0]), self._num_units], dtype)
            c_prev_freq = array_ops.zeros(
                [int(inputs.get_shape()[0]), self._num_units], dtype)
            for freq_index in range(len(freq_inputs)):
                if self._state_is_tuple:
                    name_prefix = "state_f%02d" % freq_index
                    c_prev_time = getattr(state, name_prefix + "_c")
                    m_prev_time = getattr(state, name_prefix + "_m")
                else:
                    c_prev_time = array_ops.slice(
                        state, [0, 2 * freq_index * self._num_units],
                        [-1, self._num_units])
                    m_prev_time = array_ops.slice(
                        state, [0, (2 * freq_index + 1) * self._num_units],
                        [-1, self._num_units])

                # i = input_gate, j = new_input, f = forget_gate, o = output_gate
                cell_inputs = array_ops.concat(
                    1, [freq_inputs[freq_index], m_prev_time, m_prev_freq])

                # F-LSTM
                lstm_matrix_freq = nn_ops.bias_add(
                    math_ops.matmul(cell_inputs, concat_w_f), b_f)
                if self._couple_input_forget_gates:
                    i_freq, j_freq, o_freq = array_ops.split(
                        1, num_gates, lstm_matrix_freq)
                    f_freq = None
                else:
                    i_freq, j_freq, f_freq, o_freq = array_ops.split(
                        1, num_gates, lstm_matrix_freq)
                # T-LSTM
                if self._share_time_frequency_weights:
                    i_time = i_freq
                    j_time = j_freq
                    f_time = f_freq
                    o_time = o_freq
                else:
                    lstm_matrix_time = nn_ops.bias_add(
                        math_ops.matmul(cell_inputs, concat_w_t), b_t)
                    if self._couple_input_forget_gates:
                        i_time, j_time, o_time = array_ops.split(
                            1, num_gates, lstm_matrix_time)
                        f_time = None
                    else:
                        i_time, j_time, f_time, o_time = array_ops.split(
                            1, 4, lstm_matrix_time)

                # F-LSTM c_freq
                # input gate activations
                if self._use_peepholes:
                    i_freq_g = sigmoid(i_freq + w_i_diag_freqf * c_prev_freq +
                                       w_i_diag_freqt * c_prev_time)
                else:
                    i_freq_g = sigmoid(i_freq)
                # forget gate activations
                if self._couple_input_forget_gates:
                    f_freq_g = 1.0 - i_freq_g
                else:
                    if self._use_peepholes:
                        f_freq_g = sigmoid(f_freq + self._forget_bias +
                                           w_f_diag_freqf * c_prev_freq +
                                           w_f_diag_freqt * c_prev_time)
                    else:
                        f_freq_g = sigmoid(f_freq + self._forget_bias)
                # cell state
                c_freq = f_freq_g * c_prev_freq + i_freq_g * tanh(j_freq)
                if self._cell_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    c_freq = clip_ops.clip_by_value(c_freq, -self._cell_clip,
                                                    self._cell_clip)
                    # pylint: enable=invalid-unary-operand-type

                # T-LSTM c_freq
                # input gate activations
                if self._use_peepholes:
                    if self._share_time_frequency_weights:
                        i_time_g = sigmoid(i_time +
                                           w_i_diag_freqf * c_prev_freq +
                                           w_i_diag_freqt * c_prev_time)
                    else:
                        i_time_g = sigmoid(i_time +
                                           w_i_diag_timef * c_prev_freq +
                                           w_i_diag_timet * c_prev_time)
                else:
                    i_time_g = sigmoid(i_time)
                # forget gate activations
                if self._couple_input_forget_gates:
                    f_time_g = 1.0 - i_time_g
                else:
                    if self._use_peepholes:
                        if self._share_time_frequency_weights:
                            f_time_g = sigmoid(f_time + self._forget_bias +
                                               w_f_diag_freqf * c_prev_freq +
                                               w_f_diag_freqt * c_prev_time)
                        else:
                            f_time_g = sigmoid(f_time + self._forget_bias +
                                               w_f_diag_timef * c_prev_freq +
                                               w_f_diag_timet * c_prev_time)
                    else:
                        f_time_g = sigmoid(f_time + self._forget_bias)
                # cell state
                c_time = f_time_g * c_prev_time + i_time_g * tanh(j_time)
                if self._cell_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    c_time = clip_ops.clip_by_value(c_time, -self._cell_clip,
                                                    self._cell_clip)
                    # pylint: enable=invalid-unary-operand-type

                # F-LSTM m_freq
                if self._use_peepholes:
                    m_freq = sigmoid(o_freq + w_o_diag_freqf * c_freq +
                                     w_o_diag_freqt * c_time) * tanh(c_freq)
                else:
                    m_freq = sigmoid(o_freq) * tanh(c_freq)

                # T-LSTM m_time
                if self._use_peepholes:
                    if self._share_time_frequency_weights:
                        m_time = sigmoid(o_time + w_o_diag_freqf * c_freq +
                                         w_o_diag_freqt *
                                         c_time) * tanh(c_time)
                    else:
                        m_time = sigmoid(o_time + w_o_diag_timef * c_freq +
                                         w_o_diag_timet *
                                         c_time) * tanh(c_time)
                else:
                    m_time = sigmoid(o_time) * tanh(c_time)

                m_prev_freq = m_freq
                c_prev_freq = c_freq
                # Concatenate the outputs for T-LSTM and F-LSTM for each shift
                if freq_index == 0:
                    state_out_lst = [c_time, m_time]
                    m_out_lst = [m_time, m_freq]
                else:
                    state_out_lst.extend([c_time, m_time])
                    m_out_lst.extend([m_time, m_freq])
            if self._state_is_tuple:
                state_out = self._state_tuple_type(*state_out_lst)
            else:
                state_out = array_ops.concat(1, state_out_lst)
            # Outputs are always concated as it is never used separately.
            m_out = array_ops.concat(1, m_out_lst)
        return m_out, state_out
Beispiel #60
0
    def call(self, inputs, state):
        """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, `[batch, num_units]`.
      state: if `state_is_tuple` is False, this must be a state Tensor, `2-D,
        [batch, state_size]`.  If `state_is_tuple` is True, this must be a tuple
        of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`.

    Returns:
      A tuple containing:

      - A `2-D, [batch, output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.

    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
        inputs = self._tflite_wrapper.add_input(inputs,
                                                tag="input",
                                                name="input",
                                                aggregate="stack",
                                                index_override=0)

        # Make sure inputs and bias_initializer has the same type.
        assert inputs.dtype == self.input_to_input_w.dtype

        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        # Note: For TfLite, cell_state is at index 19 while activation state at
        # index 18.
        c_prev = self._tflite_wrapper.add_input(c_prev,
                                                tag="c_prev",
                                                name="c_prev",
                                                aggregate="first",
                                                index_override=19)
        m_prev = self._tflite_wrapper.add_input(m_prev,
                                                tag="m_prev",
                                                name="m_prev",
                                                aggregate="first",
                                                index_override=18)

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")

        inputs_and_m_prev = array_ops.concat([inputs, m_prev], axis=1)

        # i stands for input gate.
        # f stands for forget gate activation.
        # o outputs.
        # j output of LSTM unit.
        # c is the final state.
        # m is the output.
        i = nn_ops.bias_add(
            tf.matmul(inputs_and_m_prev,
                      tf.concat([self.input_to_input_w, self.cell_to_input_w],
                                axis=1),
                      transpose_b=True), self.input_bias)
        f = nn_ops.bias_add(
            tf.matmul(inputs_and_m_prev,
                      tf.concat(
                          [self.input_to_forget_w, self.cell_to_forget_w],
                          axis=1),
                      transpose_b=True), self.forget_bias)
        o = nn_ops.bias_add(
            tf.matmul(inputs_and_m_prev,
                      tf.concat(
                          [self.input_to_output_w, self.cell_to_output_w],
                          axis=1),
                      transpose_b=True), self.output_bias)
        j = nn_ops.bias_add(
            tf.matmul(inputs_and_m_prev,
                      tf.concat([self.input_to_cell_w, self.cell_to_cell_w],
                                axis=1),
                      transpose_b=True), self.cell_bias)

        # Diagonal connections
        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) *
                 c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type
        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            transposed_proj_kernel = tf.transpose(self._proj_kernel)
            m = math_ops.matmul(m, transposed_proj_kernel)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type

        c = self._tflite_wrapper.add_output(c,
                                            tag="c",
                                            name="c",
                                            aggregate="last",
                                            index_override=1)
        m = self._tflite_wrapper.add_output(m,
                                            tag="m",
                                            name="m",
                                            index_override=2,
                                            aggregate="stack")

        new_state = (tf.nn.rnn_cell.LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))
        return m, new_state