Ejemplo n.º 1
0
def rot90(image, k=1, name=None):
  """Rotate an image counter-clockwise by 90 degrees.

  Args:
    image: A 3-D tensor of shape `[height, width, channels]`.
    k: A scalar integer. The number of times the image is rotated by 90 degrees.
    name: A name for this operation (optional).

  Returns:
    A rotated 3-D tensor of the same type and shape as `image`.
  """
  with ops.name_scope(name, 'rot90', [image, k]) as scope:
    image = ops.convert_to_tensor(image, name='image')
    _Check3DImage(image, require_static=False)
    k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k')
    k.get_shape().assert_has_rank(0)
    k = math_ops.mod(k, 4)

    def _rot90():
      return array_ops.transpose(array_ops.reverse_v2(image, [1]),
                                 [1, 0, 2])
    def _rot180():
      return array_ops.reverse_v2(image, [0, 1])
    def _rot270():
      return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]),
                                  [1])
    cases = [(math_ops.equal(k, 1), _rot90),
             (math_ops.equal(k, 2), _rot180),
             (math_ops.equal(k, 3), _rot270)]

    ret = control_flow_ops.case(cases, default=lambda: image, exclusive=True,
                                name=scope)
    ret.set_shape([None, None, image.get_shape()[2]])
    return ret
Ejemplo n.º 2
0
  def pack_uint8_r2_to_uint32(self, test_input):
    num_rows, num_columns = test_input.get_shape().as_list()
    num_output_columns = int(math.ceil(num_columns / 4.0))
    padding_input = array_ops.pad(
        math_ops.cast(test_input, dtype=dtypes.uint8),
        constant_op.constant([[
            0,
            0,
        ], [0, num_output_columns * 4 - num_columns]]))
    output = array_ops.zeros([num_rows, num_output_columns],
                             dtype=dtypes.uint32)
    num_elements_per_pack = 4
    shift_bits = 8

    iota_r1 = math_ops.range(num_output_columns * num_elements_per_pack)

    for p in range(num_elements_per_pack):
      selected_index = math_ops.equal(
          math_ops.mod(iota_r1, num_elements_per_pack), p)
      gather_index = array_ops.boolean_mask(iota_r1, selected_index)
      gathered_input = array_ops.gather(padding_input, gather_index, axis=1)
      total_shift_bits = shift_bits * (num_elements_per_pack - p - 1)
      left_shift_input = bitwise_ops.left_shift(
          math_ops.cast(gathered_input, dtype=dtypes.uint32), total_shift_bits)
      output = bitwise_ops.bitwise_or(output, left_shift_input)
    return output
Ejemplo n.º 3
0
 def insert_transformed_feature(self, columns_to_tensors):
   """Handles sparse column to id conversion."""
   sparse_id_values = math_ops.mod(columns_to_tensors[self.name].values,
                                   self.bucket_size)
   columns_to_tensors[self] = ops.SparseTensor(
       columns_to_tensors[self.name].indices, sparse_id_values,
       columns_to_tensors[self.name].shape)
Ejemplo n.º 4
0
def gcd(a, b, name=None):
  """Returns the greatest common divisor via Euclid's algorithm.

  Args:
    a: The dividend. A scalar integer `Tensor`.
    b: The divisor. A scalar integer `Tensor`.
    name: An optional name for the operation.

  Returns:
    A scalar `Tensor` representing the greatest common divisor between `a` and
    `b`.

  Raises:
    ValueError: If `a` or `b` are not scalar integers.
  """
  with ops.name_scope(name, 'gcd', [a, b]):
    a = ops.convert_to_tensor(a)
    b = ops.convert_to_tensor(b)

    a.shape.assert_has_rank(0)
    b.shape.assert_has_rank(0)

    if not a.dtype.is_integer:
      raise ValueError('a must be an integer type. Got: %s' % a.dtype)
    if not b.dtype.is_integer:
      raise ValueError('b must be an integer type. Got: %s' % b.dtype)

    cond = lambda _, b: math_ops.greater(b, array_ops.zeros_like(b))
    body = lambda a, b: [b, math_ops.mod(a, b)]
    a, b = control_flow_ops.while_loop(cond, body, [a, b], back_prop=False)
    return a
Ejemplo n.º 5
0
  def testFilteredElementsStats(self, dataset_transformation):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(101).filter(
        lambda x: math_ops.equal(math_ops.mod(x, 3), 0))
    dataset = dataset_transformation(dataset, aggregator)
    iterator = dataset_ops.make_initializable_iterator(dataset)
    next_element = iterator.get_next()
    summary_t = aggregator.get_summary()

    with self.test_session() as sess:
      self.evaluate(iterator.initializer)
      for i in range(34):
        self.assertEqual(i * 3, self.evaluate(next_element))
        if i is not 0:
          self._assertSummaryHasScalarValue(
              self.evaluate(summary_t), "Filter::dropped_elements",
              float(i * 2))
        self._assertSummaryHasScalarValue(
            self.evaluate(summary_t), "Filter::filtered_elements", float(i + 1))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
      self._assertSummaryHasScalarValue(
          self.evaluate(summary_t), "Filter::dropped_elements", 67.0)
      self._assertSummaryHasScalarValue(
          self.evaluate(summary_t), "Filter::filtered_elements", 34.0)
  def testFilteredElementsStats(self):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(101).filter(
        lambda x: math_ops.equal(math_ops.mod(x, 3), 0))
    dataset = self.datasetExperimentalStats(dataset, aggregator)
    next_element = self.getNext(dataset, requires_initialization=True)

    for i in range(34):
      self.assertEqual(i * 3, self.evaluate(next_element()))
      handle = self.getHandle(aggregator)
      if i != 0:
        self.assertStatisticsHasScalarValue(
            handle, self.regexForNodeName("FilterDataset", "dropped_elements"),
            float(i * 2))
      self.assertStatisticsHasScalarValue(
          handle, self.regexForNodeName("FilterDataset", "filtered_elements"),
          float(i + 1))
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(next_element())
    handle = self.getHandle(aggregator)
    self.assertStatisticsHasScalarValue(
        handle, self.regexForNodeName("FilterDataset", "dropped_elements"),
        67.0)
    self.assertStatisticsHasScalarValue(
        handle, self.regexForNodeName("FilterDataset", "filtered_elements"),
        34.0)
Ejemplo n.º 7
0
  def testFilteredElementsStats(self, dataset_transformation):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(101).filter(
        lambda x: math_ops.equal(math_ops.mod(x, 3), 0))
    dataset = dataset_transformation(dataset, aggregator)
    next_element = self.getNext(dataset, requires_initialization=True)

    for i in range(34):
      self.assertEqual(i * 3, self.evaluate(next_element()))
      summary_str = self.evaluate(aggregator.get_summary())
      if i is not 0:
        self._assertSummaryHasScalarValue(
            summary_str,
            self.regexForNodeName("FilterDataset", "dropped_elements"),
            float(i * 2))
      self._assertSummaryHasScalarValue(
          summary_str,
          self.regexForNodeName("FilterDataset", "filtered_elements"),
          float(i + 1))
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(next_element())
    summary_str = self.evaluate(aggregator.get_summary())
    self._assertSummaryHasScalarValue(
        summary_str, self.regexForNodeName("FilterDataset", "dropped_elements"),
        67.0)
    self._assertSummaryHasScalarValue(
        summary_str, self.regexForNodeName("FilterDataset",
                                           "filtered_elements"), 34.0)
Ejemplo n.º 8
0
def _TileGrad(op, grad):
  """Sum reduces grad along the tiled dimensions."""
  input_shape = array_ops.shape(op.inputs[0])
  # We interleave multiples and input_shape to get split_shape,
  # reshape grad to split_shape, and reduce along all even
  # dimensions (the tiled dimensions) to get the result
  # with shape input_shape.  For example
  #   input_shape = [20, 30, 40]
  #   multiples = [2, 3, 4]
  #   split_shape = [2, 20, 3, 30, 4, 40]
  #   axes = [0, 2, 4]
  split_shape = array_ops.reshape(
      array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1])
  axes = math_ops.range(0, array_ops.size(split_shape), 2)
  # Sum reduces grad along the first dimension for IndexedSlices
  if isinstance(grad, ops.IndexedSlices):
    grad = math_ops.unsorted_segment_sum(
        grad.values,
        math_ops.mod(grad.indices, input_shape[0]),
        input_shape[0])
    split_shape = array_ops.concat([[1], split_shape[1:]], axis=0)
  input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes)
  # Fix shape inference
  if not context.executing_eagerly():
    input_grad.set_shape(op.inputs[0].get_shape())
  return [input_grad, None]
 def _shard_indices(self, keys):
   key_shape = keys.get_shape()
   if key_shape.ndims > 1:
     # If keys are a matrix (i.e. a single key is a vector), we use the first
     # element of each key vector to determine the shard.
     keys = array_ops.slice(keys, [0, 0], [key_shape[0].value, 1])
     keys = array_ops.reshape(keys, [-1])
   indices = math_ops.mod(math_ops.abs(keys), self._num_shards)
   return math_ops.cast(indices, dtypes.int32)
  def testFilterRange(self):
    dataset = dataset_ops.Dataset.range(100).filter(
        lambda x: math_ops.not_equal(math_ops.mod(x, 3), 2))
    iterator = dataset.make_one_shot_iterator()
    get_next = iterator.get_next()

    with self.test_session() as sess:
      self.assertEqual(0, sess.run(get_next))
      self.assertEqual(1, sess.run(get_next))
      self.assertEqual(3, sess.run(get_next))
Ejemplo n.º 11
0
 def testFixed(self):
   x = [5, 10, 23]
   for dtype in [np.int32, np.int64]:
     # Test scalar and vector versions.
     for denom in [x[0], x]:
       x_np = np.array(x, dtype=dtype)
       with self.test_session(use_gpu=True):
         x_tf = constant_op.constant(x_np, shape=x_np.shape)
         y_tf = math_ops.mod(x_tf, denom)
         y_tf_np = y_tf.eval()
         y_np = np.mod(x_np, denom)
       self.assertAllClose(y_tf_np, y_np)
Ejemplo n.º 12
0
 def testFloat(self):
   x = [0.5, 0.7, 0.3]
   for dtype in [np.float32, np.double]:
     # Test scalar and vector versions.
     for denom in [x[0], [x[0]] * 3]:
       x_np = np.array(x, dtype=dtype)
       with self.test_session(use_gpu=True):
         x_tf = constant_op.constant(x_np, shape=x_np.shape)
         y_tf = math_ops.mod(x_tf, denom)
         y_tf_np = y_tf.eval()
         y_np = np.fmod(x_np, denom)
       self.assertAllClose(y_tf_np, y_np, atol=1e-2)
Ejemplo n.º 13
0
    def _add_sinusoids_signal(x, time, min_timescale=1.0, max_timescale=1.0e4):
        """Adds a bunch of sinusoids of different frequencies to a Tensor.

        Each channel of the input Tensor is incremented by a sinusoid of a different
        frequency and phase.

        This allows attention to learn to use absolute and relative positions.
        Timing signals should be added to some precursors of both the query and the
        memory inputs to attention.

        The use of relative position is possible because sin(x+y) and cos(x+y) can be
        experessed in terms of y, sin(x) and cos(x).

        In particular, we use a geometric sequence of timescales starting with
        min_timescale and ending with max_timescale.  The number of different
        timescales is equal to channels / 2. For each timescale, we
        generate the two sinusoidal signals sin(timestep/timescale) and
        cos(timestep/timescale).  All of these sinusoids are concatenated in
        the channels dimension.

        Args:
          x: a Tensor with shape [batch, length, channels]
          min_timescale: a float
          max_timescale: a float

        Returns:
          a Tensor the same shape as x.
        """
        channels = x.get_shape().as_list()[-1]
        if x.get_shape().ndims == 3:  # [batch_size, timesteps, dim]
            length = array_ops.shape(x)[1]
            position = math_ops.to_float(math_ops.range(length))
        elif x.get_shape().ndims == 2:  # [batch_size, dim]
            length = 1
            position = math_ops.to_float(math_ops.range(time, time + 1))
        else:
            raise ValueError("need a Tensor with rank 2 or 3")
        num_timescales = channels // 2
        log_timescale_increment = (
            math.log(float(max_timescale) / float(min_timescale)) /
            (math_ops.to_float(num_timescales) - 1))
        inv_timescales = min_timescale * math_ops.exp(
            math_ops.to_float(math_ops.range(num_timescales)) * -log_timescale_increment)
        scaled_time = array_ops.expand_dims(position, 1) * array_ops.expand_dims(inv_timescales, 0)
        signal = array_ops.concat([math_ops.sin(scaled_time), math_ops.cos(scaled_time)], axis=1)
        signal = array_ops.pad(signal, [[0, 0], [0, math_ops.mod(channels, 2)]])
        if x.get_shape().ndims == 3:
            signal = array_ops.reshape(signal, [1, length, channels])
        else:
            signal = array_ops.reshape(signal, [1, channels])
        return x + signal
Ejemplo n.º 14
0
def adjust_hue(image, delta, name=None):
  """Adjust hue of an RGB image.

  This is a convenience method that converts an RGB image to float
  representation, converts it to HSV, add an offset to the hue channel, converts
  back to RGB and then back to the original data type. If several adjustments
  are chained it is advisable to minimize the number of redundant conversions.

  `image` is an RGB image.  The image hue is adjusted by converting the
  image to HSV and rotating the hue channel (H) by
  `delta`.  The image is then converted back to RGB.

  `delta` must be in the interval `[-1, 1]`.

  Args:
    image: RGB image or images. Size of the last dimension must be 3.
    delta: float.  How much to add to the hue channel.
    name: A name for this operation (optional).

  Returns:
    Adjusted image(s), same shape and DType as `image`.
  """
  with ops.name_scope(name, 'adjust_hue', [image]) as name:
    image = ops.convert_to_tensor(image, name='image')
    # Remember original dtype to so we can convert back if needed
    orig_dtype = image.dtype
    flt_image = convert_image_dtype(image, dtypes.float32)

    # TODO(zhengxq): we will switch to the fused version after we add a GPU
    # kernel for that.
    fused = os.environ.get('TF_ADJUST_HUE_FUSED', '')
    fused = fused.lower() in ('true', 't', '1')

    if not fused:
      hsv = gen_image_ops.rgb_to_hsv(flt_image)

      hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
      saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
      value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])

      # Note that we add 2*pi to guarantee that the resulting hue is a positive
      # floating point number since delta is [-0.5, 0.5].
      hue = math_ops.mod(hue + (delta + 1.), 1.)

      hsv_altered = array_ops.concat_v2([hue, saturation, value], 2)
      rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)
    else:
      rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)

    return convert_image_dtype(rgb_altered, orig_dtype)
Ejemplo n.º 15
0
 def do_test(count, modulus):
   dataset = dataset_ops.Dataset.from_tensor_slices(components).map(
       _map_fn).repeat(count).filter(
           lambda x, _y, _z: math_ops.equal(math_ops.mod(x, modulus), 0))
   self.assertEqual([c.shape[1:] for c in components],
                    [shape for shape in dataset.output_shapes])
   get_next = self.getNext(dataset)
   for _ in range(count):
     for i in [x for x in range(7) if x**2 % modulus == 0]:
       result = self.evaluate(get_next())
       for component, result_component in zip(components, result):
         self.assertAllEqual(component[i]**2, result_component)
   with self.assertRaises(errors.OutOfRangeError):
     self.evaluate(get_next())
Ejemplo n.º 16
0
 def restart_decay_fn(global_step):
   if global_step is None:
     raise ValueError("global_step is required for cosine_decay.")
   global_step = math_ops.minimum(global_step, decay_steps)
   num = math_ops.mod(num_periods * math_ops.to_float(global_step),
                      decay_steps)
   fraction = num / math_ops.to_float(decay_steps)
   decayed = 0.5 * (
       1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
   if zero_after is not None:
     tmp = math_ops.to_float(
         num_periods * global_step) / math_ops.to_float(decay_steps)
     decayed = array_ops.where(
         math_ops.greater_equal(tmp, zero_after), 0.0, decayed)
   return decayed
Ejemplo n.º 17
0
def _raised_cosine_window(name, default_name, window_length, periodic,
                          dtype, a, b):
  """Helper function for computing a raised cosine window.

  Args:
    name: Name to use for the scope.
    default_name: Default name to use for the scope.
    window_length: A scalar `Tensor` or integer indicating the window length.
    periodic: A bool `Tensor` indicating whether to generate a periodic or
      symmetric window.
    dtype: A floating point `DType`.
    a: The alpha parameter to the raised cosine window.
    b: The beta parameter to the raised cosine window.

  Returns:
    A `Tensor` of shape `[window_length]` of type `dtype`.

  Raises:
    ValueError: If `dtype` is not a floating point type or `window_length` is
      not scalar or `periodic` is not scalar.
  """
  if not dtype.is_floating:
    raise ValueError('dtype must be a floating point type. Found %s' % dtype)

  with ops.name_scope(name, default_name, [window_length, periodic]):
    window_length = ops.convert_to_tensor(window_length, dtype=dtypes.int32,
                                          name='window_length')
    window_length.shape.assert_has_rank(0)
    window_length_const = tensor_util.constant_value(window_length)
    if window_length_const == 1:
      return array_ops.ones([1], dtype=dtype)
    periodic = math_ops.cast(
        ops.convert_to_tensor(periodic, dtype=dtypes.bool, name='periodic'),
        dtypes.int32)
    periodic.shape.assert_has_rank(0)
    even = 1 - math_ops.mod(window_length, 2)

    n = math_ops.cast(window_length + periodic * even - 1, dtype=dtype)
    count = math_ops.cast(math_ops.range(window_length), dtype)
    cos_arg = constant_op.constant(2 * np.pi, dtype=dtype) * count / n

    if window_length_const is not None:
      return math_ops.cast(a - b * math_ops.cos(cos_arg), dtype=dtype)
    return control_flow_ops.cond(
        math_ops.equal(window_length, 1),
        lambda: array_ops.ones([1], dtype=dtype),
        lambda: math_ops.cast(a - b * math_ops.cos(cos_arg), dtype=dtype))
Ejemplo n.º 18
0
  def _update_damping(self, prev_batch, global_step):
    """Adapts damping parameter. Check KFAC (Section 6.5) for the details.

    The damping parameter is updated according to the Levenberg-Marquardt rule
    every `self._damping_adaptation_interval` iterations.

    Args:
      prev_batch: Tensor or tuple of tensors which can be passed to
        `self._loss_fn` to evaluate loss.
      global_step: `Variable` which keeps track of number of times the training
        variables have been updated.
    Returns:
      A `tf.cond` op which updates the damping parameter.
    """
    def compute_damping():
      """"Adapts damping parameter based on "reduction ratio".

      Reduction ratio captures how closely the quadratic approximation to the
      loss function approximates the actual loss within a trust region. The
      damping update tries to make the damping as small as possible while
      maintaining the property that the quadratic model remains a good local
      approximation to the loss function.

      Returns:
        An Op to assign newly computed damping value to `self._damping`.
      """
      prev_batch_loss = self._loss_fn(prev_batch)
      with ops.control_dependencies([prev_batch_loss]):
        rho_assign = self._rho.assign(
            (prev_batch_loss - self._prev_loss) / self._q_model_change)
        with ops.control_dependencies([rho_assign]):
          new_damping = control_flow_ops.case(
              [(self._rho < 0.25, lambda: self.damping / self._omega),
               (self._rho > 0.75, lambda: self.damping * self._omega)],
              lambda: self.damping)
          with ops.control_dependencies([new_damping]):
            new_damping_min = math_ops.maximum(new_damping, self._min_damping)
            return control_flow_ops.group(self._damping.assign(new_damping_min))

    return control_flow_ops.cond(
        math_ops.equal(
            math_ops.mod(global_step + 1, self._damping_adaptation_interval),
            0), compute_damping, control_flow_ops.no_op)
Ejemplo n.º 19
0
 def do_test(count, modulus):  # pylint: disable=missing-docstring
   dataset = dataset_ops.Dataset.from_tensor_slices(components).map(
       _map_fn).repeat(count)
   # pylint: disable=g-long-lambda
   dataset = self.apply_filter(
       dataset, lambda x, _y, _z: math_ops.equal(
           math_ops.mod(x, modulus), 0))
   # pylint: enable=g-long-lambda
   self.assertEqual(
       [c.shape[1:] for c in components],
       [shape for shape in dataset_ops.get_legacy_output_shapes(dataset)])
   get_next = self.getNext(dataset)
   for _ in range(count):
     for i in [x for x in range(7) if x**2 % modulus == 0]:
       result = self.evaluate(get_next())
       for component, result_component in zip(components, result):
         self.assertAllEqual(component[i]**2, result_component)
   with self.assertRaises(errors.OutOfRangeError):
     self.evaluate(get_next())
  def testFilterDataset(self):
    components = (
        np.arange(7, dtype=np.int64),
        np.array([[1, 2, 3]], dtype=np.int64) * np.arange(
            7, dtype=np.int64)[:, np.newaxis],
        np.array(37.0, dtype=np.float64) * np.arange(7)
    )
    count = array_ops.placeholder(dtypes.int64, shape=[])
    modulus = array_ops.placeholder(dtypes.int64)

    def _map_fn(x, y, z):
      return math_ops.square(x), math_ops.square(y), math_ops.square(z)

    iterator = (
        dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn)
        .repeat(count)
        .filter(lambda x, _y, _z: math_ops.equal(math_ops.mod(x, modulus), 0))
        .make_initializable_iterator())
    init_op = iterator.initializer
    get_next = iterator.get_next()

    self.assertEqual([c.shape[1:] for c in components],
                     [t.shape for t in get_next])

    with self.test_session() as sess:
      # Test that we can dynamically feed a different modulus value for each
      # iterator.
      def do_test(count_val, modulus_val):
        sess.run(init_op, feed_dict={count: count_val, modulus: modulus_val})
        for _ in range(count_val):
          for i in [x for x in range(7) if x**2 % modulus_val == 0]:
            result = sess.run(get_next)
            for component, result_component in zip(components, result):
              self.assertAllEqual(component[i]**2, result_component)
        with self.assertRaises(errors.OutOfRangeError):
          sess.run(get_next)

      do_test(14, 2)
      do_test(4, 18)

      # Test an empty dataset.
      do_test(0, 1)
Ejemplo n.º 21
0
def adjust_hue(image, delta, name=None):
    with ops.op_scope([image], name, 'adjust_hue') as name:
        # Remember original dtype to so we can convert back if needed
        orig_dtype = image.dtype
        flt_image = tf.image.convert_image_dtype(image, tf.float32)

        hsv = gen_image_ops.rgb_to_hsv(flt_image)

        hue = tf.slice(hsv, [0, 0, 0, 0], [-1, -1, -1, 1])
        saturation = tf.slice(hsv, [0, 0, 0, 1], [-1, -1, -1, 1])
        value = tf.slice(hsv, [0, 0, 0, 2], [-1, -1, -1, 1])

        # Note that we add 2*pi to guarantee that the resulting hue is a positive
        # floating point number since delta is [-0.5, 0.5].
        hue = math_ops.mod(hue + (delta + 1.), 1.)

        hsv_altered = tf.concat(3, [hue, saturation, value])
        rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)

        return tf.image.convert_image_dtype(rgb_altered, orig_dtype)
Ejemplo n.º 22
0
def adjust_hue(image, delta, name=None):
  """Adjust hue of an RGB image.

  This is a convenience method that converts an RGB image to float
  representation, converts it to HSV, add an offset to the hue channel, converts
  back to RGB and then back to the original data type. If several adjustments
  are chained it is advisable to minimize the number of redundant conversions.

  `image` is an RGB image.  The image hue is adjusted by converting the
  image to HSV and rotating the hue channel (H) by
  `delta`.  The image is then converted back to RGB.

  `delta` must be in the interval `[-1, 1]`.

  Args:
    image: RGB image or images. Size of the last dimension must be 3.
    delta: float.  How much to add to the hue channel.
    name: A name for this operation (optional).

  Returns:
    Adjusted image(s), same shape and DType as `image`.
  """
  with ops.op_scope([image], name, 'adjust_hue') as name:
    # Remember original dtype to so we can convert back if needed
    orig_dtype = image.dtype
    flt_image = convert_image_dtype(image, dtypes.float32)

    hsv = gen_image_ops.rgb_to_hsv(flt_image)

    hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
    saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
    value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])

    # Note that we add 2*pi to guarantee that the resulting hue is a positive
    # floating point number since delta is [-0.5, 0.5].
    hue = math_ops.mod(hue + (delta + 1.), 1.)

    hsv_altered = array_ops.concat(2, [hue, saturation, value])
    rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)

    return convert_image_dtype(rgb_altered, orig_dtype)
Ejemplo n.º 23
0
  def _Grad(op, grad):
    """A gradient function for IRFFT with the provided `rank` and `rfft_fn`."""
    # Generate a simple mask like [1.0, 2.0, ..., 2.0, 1.0] for even-length FFTs
    # and [1.0, 2.0, ..., 2.0] for odd-length FFTs. To reduce extra ops in the
    # graph we special-case the situation where the FFT length and last
    # dimension of the input are known at graph construction time.
    fft_length = op.inputs[1]
    is_odd = math_ops.mod(fft_length[-1], 2)
    input_last_dimension = array_ops.shape(op.inputs[0])[-1]
    mask = array_ops.concat(
        [[1.0], 2.0 * array_ops.ones([input_last_dimension - 2 + is_odd]),
         array_ops.ones([1 - is_odd])], 0)

    rsize = math_ops.reciprocal(math_ops.to_float(_FFTSizeForGrad(grad, rank)))

    # The gradient of IRFFT is the RFFT of the incoming gradient times a scaling
    # factor and a mask. The mask scales the gradient for the Hermitian
    # symmetric components of the RFFT by a factor of two, since these
    # components are de-duplicated in the RFFT.
    rfft = rfft_fn(grad, fft_length)
    return rfft * math_ops.cast(rsize * mask, dtypes.complex64), None
Ejemplo n.º 24
0
def gcd(a, b, name=None):
  """Returns the greatest common divisor via Euclid's algorithm.

  Args:
    a: The dividend. A scalar integer `Tensor`.
    b: The divisor. A scalar integer `Tensor`.
    name: An optional name for the operation.

  Returns:
    A scalar `Tensor` representing the greatest common divisor between `a` and
    `b`.

  Raises:
    ValueError: If `a` or `b` are not scalar integers.
  """
  with ops.name_scope(name, 'gcd', [a, b]):
    a = ops.convert_to_tensor(a)
    b = ops.convert_to_tensor(b)

    a.shape.assert_has_rank(0)
    b.shape.assert_has_rank(0)

    if not a.dtype.is_integer:
      raise ValueError('a must be an integer type. Got: %s' % a.dtype)
    if not b.dtype.is_integer:
      raise ValueError('b must be an integer type. Got: %s' % b.dtype)

    # TPU requires static shape inference. GCD is used for subframe size
    # computation, so we should prefer static computation where possible.
    const_a = tensor_util.constant_value(a)
    const_b = tensor_util.constant_value(b)
    if const_a is not None and const_b is not None:
      return ops.convert_to_tensor(fractions.gcd(const_a, const_b))

    cond = lambda _, b: math_ops.greater(b, array_ops.zeros_like(b))
    body = lambda a, b: [b, math_ops.mod(a, b)]
    a, b = control_flow_ops.while_loop(cond, body, [a, b], back_prop=False)
    return a
  def testMultiplePrefetchStats(self):

    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(10).prefetch(
        2).filter(lambda x: math_ops.equal(math_ops.mod(x, 2), 0)).prefetch(1)

    dataset = self.datasetExperimentalStats(dataset, aggregator)
    next_element = self.getNext(dataset, requires_initialization=True)

    for i in range(5):
      self.assertEqual(i * 2, self.evaluate(next_element()))
      handle = self.getHandle(aggregator)
      # TODO(shivaniagarwal): using exact name of prefetch node than the regex,
      # to differentiate between two prefetch. This might break in future, at
      # which point, it would be best to disable this test.
      self.assertStatisticsHasScalarValue(
          handle, "PrefetchDataset/_5::buffer_capacity", 2)
      self.assertStatisticsContains(handle, "PrefetchDataset/_5::buffer_size")
      self.assertStatisticsHasScalarValue(
          handle, "PrefetchDataset/_8::buffer_capacity", 1)
      self.assertStatisticsContains(handle, "PrefetchDataset/_8::buffer_size")
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(next_element())
Ejemplo n.º 26
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to global variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
        apply_updates = self._opt.apply_gradients(grads_and_vars)
        with ops.control_dependencies([apply_updates]):
            local_update = state_ops.assign_add(self._local_step,
                                                1,
                                                name='local_step_update').op

        # update global variables.
        def _Update_global_variables():
            local_vars = [v for g, v in grads_and_vars if g is not None]
            global_center_vars = [self._global_map[var] for var in local_vars]
            local_center_vars = [self._local_map[var] for var in local_vars]
            local_center_vars_update = []
            for lvar, var in zip(local_center_vars, global_center_vars):
                local_center_vars_update.append(lvar.assign(var))
            update_ops = []
            differences = []
            with ops.control_dependencies(local_center_vars_update):
                for v, lv in zip(local_vars, local_center_vars):
                    with ops.device(v.device):
                        differences.append(math_ops.subtract(v, lv))
                for lvar, diff in zip(local_vars, differences):
                    with ops.device(lvar.device):
                        update_ops.append(
                            state_ops.assign_sub(
                                lvar,
                                math_ops.multiply(self._moving_rate, diff)))
                for var, diff in zip(global_center_vars, differences):
                    with ops.device(var.device):
                        update_ops.append(
                            state_ops.assign_add(
                                var, math_ops.multiply(self._moving_rate,
                                                       diff)))
                if global_step:
                    with ops.colocate_with(global_step):
                        update_ops.append(state_ops.assign_add(global_step, 1))
            variable_update = control_flow_ops.group(*(update_ops))
            return variable_update

        with ops.control_dependencies([local_update]):
            condition = math_ops.equal(
                math_ops.mod(self._local_step, self._period), 0)
            conditional_update = control_flow_ops.cond(
                condition, _Update_global_variables, control_flow_ops.no_op)
        return conditional_update
Ejemplo n.º 27
0
def do_training(train_op, init_fn=None, summary_op=None, lr=None):
    global savers
    graph = ops.get_default_graph()
    with graph.as_default():
        global_step = variables.get_or_create_global_step()
        saver = tf_saver.Saver(max_to_keep=0)

        with ops.name_scope('init_ops'):
            init_op = tf_variables.global_variables_initializer()

            ready_op = tf_variables.report_uninitialized_variables()

            local_init_op = control_flow_ops.group(
                tf_variables.local_variables_initializer(),
                data_flow_ops.tables_initializer())

        summary_writer = supervisor.Supervisor.USE_DEFAULT
        with ops.name_scope('train_step'):
            train_step_kwargs = {}

            if not FLAGS.max_number_of_steps is None:
                should_stop_op = math_ops.greater_equal(
                    global_step, FLAGS.max_number_of_steps)
            else:
                should_stop_op = constant_op.constant(False)
            train_step_kwargs['should_stop'] = should_stop_op
            if FLAGS.log_every_n_steps > 0:
                train_step_kwargs['should_log'] = math_ops.equal(
                    math_ops.mod(global_step, FLAGS.log_every_n_steps), 0)
        prefix = "loc/net"
        lp = len(prefix)
        vdic = {
            "InceptionV2" + v.op.name[lp:]: v
            for v in tf.trainable_variables()
            if v.name.startswith(prefix) and v.name.find("Logits/") < 0
        }
        _saver = tf_saver.Saver(vdic)
        savers.append(_saver)
        for i in xrange(NUM_STN):
            prefix = "stn%d/net" % i
            lp = len(prefix)
            vdic = {
                "InceptionV2" + v.op.name[lp:]: v
                for v in tf.trainable_variables()
                if v.name.startswith(prefix) and v.name.find("Logits/") < 0
            }
            # saver = tf.train.Saver(vdic)
            _saver = tf_saver.Saver(vdic)
            savers.append(_saver)
    prt("savers %d" % len(savers))

    is_chief = True
    logdir = FLAGS.train_dir

    sv = supervisor.Supervisor(graph=graph,
                               is_chief=is_chief,
                               logdir=logdir,
                               init_op=init_op,
                               init_feed_dict=None,
                               local_init_op=local_init_op,
                               ready_for_local_init_op=None,
                               ready_op=ready_op,
                               summary_op=summary_op,
                               summary_writer=summary_writer,
                               global_step=global_step,
                               saver=saver,
                               save_summaries_secs=FLAGS.save_summaries_secs,
                               save_model_secs=FLAGS.save_interval_secs,
                               init_fn=init_fn)

    if summary_writer is not None:
        train_step_kwargs['summary_writer'] = sv.summary_writer

    with sv.managed_session('', start_standard_services=False,
                            config=None) as sess:
        logging.info('Starting Session.')
        if is_chief:
            if logdir:
                sv.start_standard_services(sess)
        elif startup_delay_steps > 0:
            _wait_for_step(
                sess, global_step,
                min(startup_delay_steps, number_of_steps or sys.maxint))
        sv.start_queue_runners(sess)
        logging.info('Starting Queues.')
        try:
            while not sv.should_stop():
                total_loss, global_step_value, should_stop = train_step(
                    sess, train_op, global_step, lr, train_step_kwargs)
                current_epoch = int(
                    math.ceil(float(global_step_value) / FLAGS.steps_in_epoch))
                if global_step_value > 0 and global_step_value % FLAGS.save_every_n_steps == 0:
                    sv.saver.save(sess,
                                  sv.save_path,
                                  global_step=sv.global_step)

                if should_stop:
                    logging.info('Stopping Training.')
                    break
        except errors.OutOfRangeError:
            # OutOfRangeError is thrown when epoch limit per
            # tf.train.limit_epochs is reached.
            logging.info('Caught OutOfRangeError. Stopping Training.')
        if logdir and sv.is_chief:
            logging.info('Finished training! Saving model to disk.')
            sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
Ejemplo n.º 28
0
def train(train_op,
          logdir,
          loss,
          logits,
          batch,
          endpoint,
          train_step_fn=train_step,
          train_step_kwargs=_USE_DEFAULT,
          log_every_n_steps=1,
          graph=None,
          master='',
          is_chief=True,
          global_step=None,
          number_of_steps=None,
          init_op=_USE_DEFAULT,
          init_feed_dict=None,
          local_init_op=_USE_DEFAULT,
          init_fn=None,
          ready_op=_USE_DEFAULT,
          summary_op=_USE_DEFAULT,
          save_summaries_secs=600,
          summary_writer=_USE_DEFAULT,
          startup_delay_steps=0,
          saver=None,
          save_interval_secs=600,
          sync_optimizer=None,
          session_config=None,
          trace_every_n_steps=None):
    """Runs a training loop using a TensorFlow supervisor.

  When the sync_optimizer is supplied, gradient updates are applied
  synchronously. Otherwise, gradient updates are applied asynchronous.

  Args:
    train_op: A `Tensor` that, when executed, will apply the gradients and
      return the loss value.
    logdir: The directory where training logs are written to. If None, model
      checkpoints and summaries will not be written.
    train_step_fn: The function to call in order to execute a single gradient
      step. The function must have take exactly four arguments: the current
      session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary.
    train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By
      default, two `Boolean`, scalar ops called "should_stop" and "should_log"
      are provided.
    log_every_n_steps: The frequency, in terms of global steps, that the loss
      and global step and logged.
    graph: The graph to pass to the supervisor. If no graph is supplied the
      default graph is used.
    master: The BNS name of the tensorflow master.
    is_chief: Specifies whether or not the training is being run by the primary
      replica during replica training.
    global_step: The `Tensor` representing the global step. If left as `None`,
      then slim.variables.get_or_create_global_step() is used.
    number_of_steps: The max number of gradient steps to take during training.
      If the value is left as None, training proceeds indefinitely.
    init_op: The initialization operation. If left to its default value, then
      the session is initialized by calling `tf.initialize_all_variables()`.
    init_feed_dict: A feed dictionary to use when executing the `init_op`.
    local_init_op: The local initialization operation. If left to its default
      value, then the session is initialized by calling
      `tf.initialize_local_variables()` and `tf.initialize_all_tables()`.
    init_fn: An optional callable to be executed after `init_op` is called. The
      callable must accept one argument, the session being initialized.
    ready_op: Operation to check if the model is ready to use. If left to its
      default value, then the session checks for readiness by calling
      `tf.report_uninitialized_variables()`.
    summary_op: The summary operation.
    save_summaries_secs: How often, in seconds, to save summaries.
    summary_writer: `SummaryWriter` to use.  Can be `None`
      to indicate that no summaries should be written. If unset, we
      create a SummaryWriter.
    startup_delay_steps: The number of steps to wait for before beginning. Note
      that this must be 0 if a sync_optimizer is supplied.
    saver: Saver to save checkpoints. If None, a default one will be created
      and used.
    save_interval_secs: How often, in seconds, to save the model to `logdir`.
    sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the
      argument is supplied, gradient updates will be synchronous. If left as
      `None`, gradient updates will be asynchronous.
    session_config: An instance of `tf.ConfigProto` that will be used to
      configure the `Session`. If left as `None`, the default will be used.
    trace_every_n_steps: produce and save a `Timeline` in Chrome trace format
      and add it to the summaries every `trace_every_n_steps`. If None, no trace
      information will be produced or saved.

  Returns:
    the value of the loss function after training.

  Raises:
    ValueError: if `train_op` is empty or if `startup_delay_steps` is
      non-zero when `sync_optimizer` is supplied, if `number_of_steps` is
      negative, or if `trace_every_n_steps` is not `None` and no `logdir` is
      provided.
  """
    if train_op is None:
        raise ValueError('train_op cannot be None.')

    if logdir is None:
        if summary_op != _USE_DEFAULT:
            raise ValueError('Cannot provide summary_op because logdir=None')
        if saver is not None:
            raise ValueError('Cannot provide saver because logdir=None')
        if trace_every_n_steps is not None:
            raise ValueError('Cannot provide trace_every_n_steps because '
                             'logdir=None')

    if sync_optimizer and startup_delay_steps > 0:
        raise ValueError(
            'startup_delay_steps must be zero when sync_optimizer is supplied.'
        )

    if number_of_steps is not None and number_of_steps <= 0:
        raise ValueError(
            '`number_of_steps` must be either None or a positive number.')

    graph = graph or ops.get_default_graph()
    with graph.as_default():
        if global_step is None:
            global_step = variables.get_or_create_global_step()
        saver = saver or tf_saver.Saver()

        with ops.name_scope('init_ops'):
            if init_op == _USE_DEFAULT:
                init_op = tf_variables.initialize_all_variables()

            if ready_op == _USE_DEFAULT:
                ready_op = tf_variables.report_uninitialized_variables()

            if local_init_op == _USE_DEFAULT:
                local_init_op = control_flow_ops.group(
                    tf_variables.initialize_local_variables(),
                    data_flow_ops.initialize_all_tables())

        if summary_op == _USE_DEFAULT:
            summary_op = logging_ops.merge_all_summaries()

        if summary_writer == _USE_DEFAULT:
            summary_writer = supervisor.Supervisor.USE_DEFAULT

        cleanup_op = None

        if is_chief and sync_optimizer:
            if not isinstance(sync_optimizer,
                              sync_replicas_optimizer.SyncReplicasOptimizer):
                raise ValueError(
                    '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer'
                )

            # Need to create these BEFORE the supervisor finalizes the graph:
            with ops.control_dependencies([init_op]):
                init_tokens_op = sync_optimizer.get_init_tokens_op()
            init_op = init_tokens_op
            chief_queue_runner = sync_optimizer.get_chief_queue_runner()
            cleanup_op = sync_optimizer.get_clean_up_op()

        if train_step_kwargs == _USE_DEFAULT:
            with ops.name_scope('train_step'):
                train_step_kwargs = {}

                if number_of_steps:
                    should_stop_op = math_ops.greater_equal(
                        global_step, number_of_steps)
                else:
                    should_stop_op = constant_op.constant(False)
                train_step_kwargs['should_stop'] = should_stop_op
                train_step_kwargs['should_log'] = math_ops.equal(
                    math_ops.mod(global_step, log_every_n_steps), 0)
                if is_chief and trace_every_n_steps is not None:
                    train_step_kwargs['should_trace'] = math_ops.equal(
                        math_ops.mod(global_step, trace_every_n_steps), 0)
                    train_step_kwargs['logdir'] = logdir

    sv = supervisor.Supervisor(graph=graph,
                               is_chief=is_chief,
                               logdir=logdir,
                               init_op=init_op,
                               init_feed_dict=init_feed_dict,
                               local_init_op=local_init_op,
                               ready_op=ready_op,
                               summary_op=summary_op,
                               summary_writer=summary_writer,
                               global_step=global_step,
                               saver=saver,
                               save_summaries_secs=save_summaries_secs,
                               save_model_secs=save_interval_secs,
                               init_fn=init_fn)

    if summary_writer is not None:
        train_step_kwargs['summary_writer'] = sv.summary_writer

    should_retry = True
    while should_retry:
        try:
            should_retry = False
            with sv.managed_session(master,
                                    start_standard_services=False,
                                    config=session_config) as sess:
                logging.info('Starting Session.')
                if is_chief:
                    if logdir:
                        sv.start_standard_services(sess)
                elif startup_delay_steps > 0:
                    slim.learning._wait_for_step(
                        sess, global_step,
                        min(startup_delay_steps, number_of_steps
                            or sys.maxint))
                sv.start_queue_runners(sess)
                logging.info('Starting Queues.')
                if is_chief and sync_optimizer:
                    sv.start_queue_runners(sess, [chief_queue_runner])
                try:
                    while not sv.should_stop():
                        try:
                            total_loss, should_stop = train_step_fn(
                                sess, train_op, endpoint, batch, logits, loss,
                                global_step, number_of_steps,
                                train_step_kwargs)
                        except tf.errors.OutOfRangeError:
                            if logdir and sv.is_chief:
                                sv.saver.save(sess,
                                              sv.save_path,
                                              global_step=sv.global_step)
                            if sv.is_chief and cleanup_op is not None:
                                sess.run(cleanup_op)
                            print('Training finished over one epoch....')
                            break

                        if (global_step % 2):
                            print '%f steps finished, final step loss: %f ' % (
                                global_step, total_loss)
                        if should_stop:
                            logging.info('Stopping Training.')
                            break
                    if logdir and sv.is_chief:
                        logging.info(
                            'Finished training! Saving model to disk.')
                        sv.saver.save(sess,
                                      sv.save_path,
                                      global_step=sv.global_step)
                except tf.errors.OutOfRangeError:
                    if logdir and sv.is_chief:
                        logging.info(
                            'Finished training! Saving model to disk.')
                        sv.saver.save(sess,
                                      sv.save_path,
                                      global_step=sv.global_step)
                    if sv.is_chief and cleanup_op is not None:
                        sess.run(cleanup_op)
                    print('Training finished over one epoch....')
                except:
                    if sv.is_chief and cleanup_op is not None:
                        logging.info('About to execute sync_clean_up_op!')
                        sess.run(cleanup_op)
                    raise

        except errors.AbortedError:
            # Always re-run on AbortedError as it indicates a restart of one of the
            # distributed tensorflow servers.
            logging.info('Retrying training!')
            should_retry = True

    return total_loss
Ejemplo n.º 29
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to global variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the variables
        have been updated.
      name: Optional name for the returned operation.  Default to the name
        passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.
    """
        local_vars = [v for g, v in grads_and_vars if g is not None]
        grads = [g for g, v in grads_and_vars if g is not None]

        def _variable_creator(next_creator, collections, **kwargs):
            if not collections:
                collections = [ops.GraphKeys.LOCAL_VARIABLES]
            elif ops.GraphKeys.GLOBAL_VARIABLES in collections:
                collections = list(collections)
                collections.append(ops.GraphKeys.LOCAL_VARIABLES)
                collections.remove(ops.GraphKeys.GLOBAL_VARIABLES)
            return next_creator(collections=collections, **kwargs)

        # theta = theta - lr * grad
        with variable_scope.variable_creator_scope(_variable_creator):
            local_update_op = self._opt.apply_gradients(grads_and_vars)

        # a = a + grad
        update_ops = []
        update_ops.append(local_update_op)
        grad_vars = [self._grad_map[var] for var in local_vars]
        for g, grad_var in zip(grads, grad_vars):
            update_ops.append(state_ops.assign_add(grad_var, g))

        global_center_vars = [self._global_map[var] for var in local_vars]

        # update global variables.
        def _Update_global_variables():
            global_norm = []
            # a = a / t
            for g in grad_vars:
                global_norm.append(state_ops.assign(g, g / self._period))
            # apply
            with ops.control_dependencies(global_norm):
                apply_global_op = self._opt.apply_gradients(
                    zip(grad_vars, global_center_vars))

            # pull
            with ops.control_dependencies([apply_global_op]):
                update_ops = []
                if global_step:
                    with ops.colocate_with(global_step):
                        update_ops.append(state_ops.assign_add(global_step, 1))

                for lvar in local_vars:
                    g_val = self._global_map[lvar].read_value()
                    update_ops.append(state_ops.assign(lvar, g_val))
                for grad_var in grad_vars:
                    update_ops.append(
                        state_ops.assign(grad_var,
                                         array_ops.zeros_like(grad_var)))
                variable_update = control_flow_ops.group(*(update_ops))
            return variable_update

        local_update = state_ops.assign_add(self._local_step,
                                            1,
                                            name='local_step_update').op

        with ops.control_dependencies([local_update]):
            condition = math_ops.equal(
                math_ops.mod(self._local_step, self._period), 0)
        with ops.control_dependencies(update_ops):
            conditional_update = control_flow_ops.cond(
                condition, _Update_global_variables, control_flow_ops.no_op)
        return conditional_update
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight):
  """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

  Returns:
    A new beam state.
  """
  static_batch_size = tensor_util.constant_value(batch_size)

  # Calculate the current lengths of the predictions
  prediction_lengths = beam_state.lengths
  previously_finished = beam_state.finished

  # Calculate the total log probs for the new hypotheses
  # Final Shape: [batch_size, beam_width, vocab_size]
  step_log_probs = nn_ops.log_softmax(logits)
  step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished)
  total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs

  # Calculate the continuation lengths by adding to all continuing beams.
  vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1]
  lengths_to_add = array_ops.one_hot(
      indices=array_ops.fill([batch_size, beam_width], end_token),
      depth=vocab_size,
      on_value=np.int64(0),
      off_value=np.int64(1),
      dtype=dtypes.int64)
  add_mask = math_ops.to_int64(math_ops.logical_not(previously_finished))
  lengths_to_add *= array_ops.expand_dims(add_mask, 2)
  new_prediction_lengths = (
      lengths_to_add + array_ops.expand_dims(prediction_lengths, 2))

  # Calculate the scores for each beam
  scores = _get_scores(
      log_probs=total_probs,
      sequence_lengths=new_prediction_lengths,
      length_penalty_weight=length_penalty_weight,
      dtype=logits.dtype)

  time = ops.convert_to_tensor(time, name="time")
  # During the first time step we only consider the initial beam
  scores_shape = array_ops.shape(scores)
  scores_flat = array_ops.reshape(scores, [batch_size, -1])

  # Pick the next beams according to the specified successors function
  next_beam_size = ops.convert_to_tensor(
      beam_width, dtype=dtypes.int32, name="beam_width")
  next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size)

  next_beam_scores.set_shape([static_batch_size, beam_width])
  word_indices.set_shape([static_batch_size, beam_width])

  # Pick out the probs, beam_ids, and states according to the chosen predictions
  next_beam_probs = _tensor_gather_helper(
      gather_indices=word_indices,
      gather_from=total_probs,
      batch_size=batch_size,
      range_size=beam_width * vocab_size,
      gather_shape=[-1],
      name="next_beam_probs")
  # Note: just doing the following
  #   math_ops.to_int32(word_indices % vocab_size,
  #       name="next_beam_word_ids")
  # would be a lot cleaner but for reasons unclear, that hides the results of
  # the op which prevents capturing it with tfdbg debug ops.
  raw_next_word_ids = math_ops.mod(
      word_indices, vocab_size, name="next_beam_word_ids")
  next_word_ids = math_ops.to_int32(raw_next_word_ids)
  next_beam_ids = math_ops.to_int32(
      word_indices / vocab_size, name="next_beam_parent_ids")

  # Append new ids to current predictions
  previously_finished = _tensor_gather_helper(
      gather_indices=next_beam_ids,
      gather_from=previously_finished,
      batch_size=batch_size,
      range_size=beam_width,
      gather_shape=[-1])
  next_finished = math_ops.logical_or(
      previously_finished,
      math_ops.equal(next_word_ids, end_token),
      name="next_beam_finished")

  # Calculate the length of the next predictions.
  # 1. Finished beams remain unchanged.
  # 2. Beams that are now finished (EOS predicted) have their length
  #    increased by 1.
  # 3. Beams that are not yet finished have their length increased by 1.
  lengths_to_add = math_ops.to_int64(math_ops.logical_not(previously_finished))
  next_prediction_len = _tensor_gather_helper(
      gather_indices=next_beam_ids,
      gather_from=beam_state.lengths,
      batch_size=batch_size,
      range_size=beam_width,
      gather_shape=[-1])
  next_prediction_len += lengths_to_add

  # Pick out the cell_states according to the next_beam_ids. We use a
  # different gather_shape here because the cell_state tensors, i.e.
  # the tensors that would be gathered from, all have dimension
  # greater than two and we need to preserve those dimensions.
  # pylint: disable=g-long-lambda
  next_cell_state = nest.map_structure(
      lambda gather_from: _maybe_tensor_gather_helper(
          gather_indices=next_beam_ids,
          gather_from=gather_from,
          batch_size=batch_size,
          range_size=beam_width,
          gather_shape=[batch_size * beam_width, -1]),
      next_cell_state)
  # pylint: enable=g-long-lambda

  next_state = BeamSearchDecoderState(
      cell_state=next_cell_state,
      log_probs=next_beam_probs,
      lengths=next_prediction_len,
      finished=next_finished)

  output = BeamSearchDecoderOutput(
      scores=next_beam_scores,
      predicted_ids=next_word_ids,
      parent_ids=next_beam_ids)

  return output, next_state
Ejemplo n.º 31
0
def _train_deeplab_model(iterator, num_of_classes, ignore_label):
    """Trains the deeplab model.

  Args:
    iterator: An iterator of type tf.data.Iterator for images and labels.
    num_of_classes: Number of classes for the dataset.
    ignore_label: Ignore label for the dataset.

  Returns:
    train_tensor: A tensor to update the model variables.
    summary_op: An operation to log the summaries.
  """
    global_step = tf.train.get_or_create_global_step()

    learning_rate = train_utils.get_model_learning_rate(
        FLAGS.learning_policy, FLAGS.base_learning_rate,
        FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor,
        FLAGS.training_number_of_steps, FLAGS.learning_power,
        FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum)

    tower_losses = []
    tower_grads = []
    for i in range(FLAGS.num_clones):
        with tf.device('/gpu:%d' % i):
            # First tower has default name scope.
            name_scope = ('clone_%d' % i) if i else ''
            with tf.name_scope(name_scope) as scope:
                loss = _tower_loss(iterator=iterator,
                                   num_of_classes=num_of_classes,
                                   ignore_label=ignore_label,
                                   scope=scope,
                                   reuse_variable=(i != 0))
                tower_losses.append(loss)

    if FLAGS.quantize_delay_step >= 0:
        if FLAGS.num_clones > 1:
            raise ValueError('Quantization doesn\'t support multi-clone yet.')
        tf.contrib.quantize.create_training_graph(
            quant_delay=FLAGS.quantize_delay_step)

    for i in range(FLAGS.num_clones):
        with tf.device('/gpu:%d' % i):
            name_scope = ('clone_%d' % i) if i else ''
            with tf.name_scope(name_scope) as scope:
                grads = optimizer.compute_gradients(tower_losses[i])
                tower_grads.append(grads)

    with tf.device('/cpu:0'):
        grads_and_vars = _average_gradients(tower_grads)

        # Modify the gradients for biases and last layer variables.
        last_layers = model.get_extra_layer_scopes(
            FLAGS.last_layers_contain_logits_only)
        grad_mult = train_utils.get_model_gradient_multipliers(
            last_layers, FLAGS.last_layer_gradient_multiplier)
        if grad_mult:
            grads_and_vars = tf.contrib.training.multiply_gradients(
                grads_and_vars, grad_mult)

        # Create gradient update op.
        grad_updates = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

        # Gather update_ops. These contain, for example,
        # the updates for the batch_norm variables created by model_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        update_ops.append(grad_updates)
        update_op = tf.group(*update_ops)

        total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

        # Print total loss to the terminal.
        # This implementation is mirrored from tf.slim.summaries.
        should_log = math_ops.equal(math_ops.mod(global_step, FLAGS.log_steps),
                                    0)
        total_loss = tf.cond(
            should_log,
            lambda: tf.Print(total_loss, [total_loss], 'Total loss is :'),
            lambda: total_loss)

        tf.summary.scalar('total_loss', total_loss)
        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')

        # Excludes summaries from towers other than the first one.
        summary_op = tf.summary.merge_all(scope='(?!clone_)')

    return train_tensor, summary_op
Ejemplo n.º 32
0
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight):
    """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

  Returns:
    A new beam state.
  """

    static_batch_size = tensor_util.constant_value(batch_size)

    # Calculate the current lengths of the predictions
    prediction_lengths = beam_state.lengths
    previously_finished = beam_state.finished

    # Calculate the total log probs for the new hypotheses
    # Final Shape: [batch_size, beam_width, vocab_size]
    step_log_probs = nn_ops.log_softmax(logits)
    #step_log_probs",Tensor shape=(?, 10, 56136)
    step_log_probs = _mask_probs(step_log_probs, end_token,
                                 previously_finished)
    #step_log_probs_masked (?, 10, 56136)
    total_probs = array_ops.expand_dims(beam_state.log_probs,
                                        2) + step_log_probs
    #total_probs (?, 10, 56136)
    # Calculate the continuation lengths by adding to all continuing beams.
    vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1]
    lengths_to_add = array_ops.one_hot(
        indices=array_ops.tile(array_ops.reshape(end_token, [1, 1]),
                               [batch_size, beam_width]),
        depth=vocab_size,
        on_value=constant_op.constant(0, dtype=dtypes.int64),
        off_value=constant_op.constant(1, dtype=dtypes.int64),
        dtype=dtypes.int64)
    #lengths_to_add shape=(?, 10, 56136)
    add_mask = (1 - math_ops.to_int64(previously_finished))
    #add_mask shape=(?, 10), dtype=int64
    lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add
    #lengths_to_add shape=(?, 10, 56136)
    new_prediction_lengths = (lengths_to_add +
                              array_ops.expand_dims(prediction_lengths, 2))
    #new_prediction_lengths shape=(?, 10, 56136)
    # Calculate the scores for each beam
    scores = _get_scores(log_probs=total_probs,
                         sequence_lengths=new_prediction_lengths,
                         length_penalty_weight=length_penalty_weight)
    scores_mask = tf.constant([step_log_probs.dtype.min, 0],
                              dtype=dtypes.float32,
                              shape=[vocab_size],
                              name='mask')
    scores_masked = tf.add(scores, scores_mask)
    scores_mask2 = tf.constant([0, 0, 0, 0, 0, step_log_probs.dtype.min, 0],
                               dtype=dtypes.float32,
                               shape=[vocab_size],
                               name='mask2')
    scores_masked = tf.add(scores_mask2, scores_masked)

    def new_scores(scores_masked):
        scores_no_stop = tf.constant([0, 0, step_log_probs.dtype.min, 0],
                                     dtype=dtypes.float32,
                                     shape=[vocab_size],
                                     name='no_stop')
        scores = tf.add(scores_masked, scores_no_stop)
        return scores

    #constrain the length
    scores = control_flow_ops.cond(
        #time <9 ,
        time < 0,
        lambda: new_scores(scores_masked),
        lambda: scores_masked)

    #scores shape=(?, 10, 56136)
    #[batch_size, beam_width, vocab_size]
    time = ops.convert_to_tensor(time, name="time")
    # During the first time step we only consider the initial beam
    scores_shape = array_ops.shape(scores)
    #scores_shape" shape=(3,)
    scores_to_flat_1 = array_ops.reshape(scores, [batch_size, 2, -1])
    print("scores_to_flat_1", scores_to_flat_1)
    scores_to_0 = scores[:, 0]
    scores_to_1 = scores[:, -1]
    scores_to_flat_2 = tf.concat([scores_to_0, scores_to_1], 1)
    scores_flat = control_flow_ops.cond(
        time > 0, lambda: scores_to_flat_1,
        lambda: array_ops.reshape(scores_to_flat_2, [batch_size, 2, -1]))
    num_available_beam = control_flow_ops.cond(
        time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]),
        lambda: math_ops.reduce_prod(scores_shape[2:]))
    #scores_flat", shape=(?, ?)
    #num_available_beam" shape=()
    # Pick the next beams according to the specified successors function
    next_beam_size = math_ops.minimum(
        ops.convert_to_tensor(beam_width,
                              dtype=dtypes.int32,
                              name="beam_width"), num_available_beam)
    #scores_t = tf.reshape(scores_flat,[batch_size,2,-1])
    ############################
    #input_words=['entrencheds01', 'entrencheds02', 'forgev01', 'forgev04', \
    #             'hitn02', 'hitn03', 'vaultn02', 'vaultn04', 'deepa03', \
    #             'deeps02', 'admitv01', 'admitv02', 'plantn01', 'plantn02',\
    #             'squaren01', 'squaren05', 'drawv05', 'drawv06', 'spellv03', \
    #             'spellv02', 'shotn02', 'shotn04', 'coachv01', 'coachv02', 'casen05',\
    #             'casen09', 'focusn01', 'focusn02', 'tasten01', 'tasten04', 'footn01', \
    #             'footv01']
    input_words = get_words()
    return_list = prior_scores(input_words)
    return_array = np.array(return_list)
    return_tensor = tf.convert_to_tensor(return_array)
    tiling = [1, 5, 1]
    prior_mask = tf.tile(tf.expand_dims(return_tensor, 1), tiling)
    prior_mask = tf.cast(prior_mask, tf.float32)
    prior_mask = array_ops.reshape(prior_mask, [batch_size, -1])
    #print ("prior_mask",prior_mask)
    scores_sum = tf.reduce_sum(scores_to_flat_1, 1)

    #print ("scores_sum_1",scores_sum)
    #def cal_scores_sum(scores_sum,prior_mask):
    #    return tf.add(scores_sum,prior_mask)
    #scores_sum = control_flow_ops.cond(
    #    time > 0,
    #    lambda: cal_scores_sum(scores_sum,prior_mask),
    #    lambda: scores_sum)
    #scores_sum=tf.add(scores_sum,prior_mask)
    #print ("scores_sum_2",scores_sum)
    ############################

    #scores_final=tf.concat([scores_sum, scores_sum],1)
    def cal_scores_indices(scores_to_0, scores_to_1):
        next_beam_scores_1, word_indices_1 = nn_ops.top_k(scores_to_0, k=5)
        print("ori next_beam_scores_1,word_indices_1", next_beam_scores_1)
        print("ori word_indices_1", word_indices_1)
        next_beam_scores_2, word_indices_2 = nn_ops.top_k(scores_to_1, k=5)
        next_beam_scores = tf.concat([next_beam_scores_1, next_beam_scores_2],
                                     1)
        word_indices = tf.concat(
            [word_indices_1, word_indices_2 + 9 * vocab_size], 1)
        return next_beam_scores, word_indices

    def cal_scores_indices_t1(scores_final, next_beam_size):
        next_beam_scores_1, word_indices_1 = nn_ops.top_k(scores_final, k=5)
        #next_beam_scores_1, word_indices_1=sample(next_beam_scores_1,word_indices_1)
        print("next_beam_scores_1", next_beam_scores_1)
        print("word_indices_1", word_indices_1)
        next_beam_scores = tf.concat([next_beam_scores_1, next_beam_scores_1],
                                     1)
        word_indices = tf.concat(
            [word_indices_1, word_indices_1 + 5 * vocab_size], 1)
        return next_beam_scores, word_indices

    next_beam_scores, word_indices = control_flow_ops.cond(
        time > 0, lambda: cal_scores_indices_t1(scores_sum, next_beam_size),
        lambda: cal_scores_indices(scores_to_0, scores_to_1))

    next_beam_scores.set_shape([static_batch_size, beam_width])
    word_indices.set_shape([static_batch_size, beam_width])
    #shape=(?, ?)
    # Pick out the probs, beam_ids, and states according to the chosen predictions

    next_beam_probs = _tensor_gather_helper(gather_indices=word_indices,
                                            gather_from=total_probs,
                                            batch_size=batch_size,
                                            range_size=beam_width * vocab_size,
                                            gather_shape=[-1],
                                            name="next_beam_probs")
    # Note: just doing the following
    #   math_ops.to_int32(word_indices % vocab_size,
    #       name="next_beam_word_ids")
    # would be a lot cleaner but for reasons unclear, that hides the results of
    # the op which prevents capturing it with tfdbg debug ops.
    raw_next_word_ids = math_ops.mod(word_indices,
                                     vocab_size,
                                     name="next_beam_word_ids")
    #raw_next_word_ids shape=(?, 10)
    next_word_ids = math_ops.to_int32(raw_next_word_ids)
    next_beam_ids = math_ops.to_int32(word_indices / vocab_size,
                                      name="next_beam_parent_ids")

    # Append new ids to current predictions
    previously_finished = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=previously_finished,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[-1])
    next_finished = math_ops.logical_or(previously_finished,
                                        math_ops.equal(next_word_ids,
                                                       end_token),
                                        name="next_beam_finished")

    # Calculate the length of the next predictions.
    # 1. Finished beams remain unchanged
    # 2. Beams that are now finished (EOS predicted) remain unchanged
    # 3. Beams that are not yet finished have their length increased by 1
    lengths_to_add = math_ops.to_int64(
        math_ops.not_equal(next_word_ids, end_token))
    lengths_to_add = (1 - math_ops.to_int64(next_finished)) * lengths_to_add
    next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids,
                                                gather_from=beam_state.lengths,
                                                batch_size=batch_size,
                                                range_size=beam_width,
                                                gather_shape=[-1])
    next_prediction_len += lengths_to_add

    # Pick out the cell_states according to the next_beam_ids. We use a
    # different gather_shape here because the cell_state tensors, i.e.
    # the tensors that would be gathered from, all have dimension
    # greater than two and we need to preserve those dimensions.
    # pylint: disable=g-long-lambda
    next_cell_state = nest.map_structure(
        lambda gather_from: _maybe_tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=gather_from,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1]), next_cell_state)
    # pylint: enable=g-long-lambda

    next_state = BeamSearchDecoderState(cell_state=next_cell_state,
                                        log_probs=next_beam_probs,
                                        lengths=next_prediction_len,
                                        finished=next_finished)
    print('next_beam_probs', next_beam_probs)
    output = BeamSearchDecoderOutput(scores=next_beam_scores,
                                     predicted_ids=next_word_ids,
                                     parent_ids=next_beam_ids)

    return output, next_state
Ejemplo n.º 33
0
 def testFilterRange(self):
     dataset = dataset_ops.Dataset.range(4)
     dataset = self.apply_filter(
         dataset, lambda x: math_ops.not_equal(math_ops.mod(x, 3), 2))
     self.assertDatasetProduces(dataset, expected_output=[0, 1, 3])
def train_y(
        train_op,
        logdir,
        train_step_fn=train_step_y,
        train_step_kwargs=_USE_DEFAULT,
        train_step_kwargs_extra=None,  # YY: i added these to be able to pass extra args for evaluation
        log_every_n_steps=1,
        eval_ops=None,  # :YY
        num_evals=0,  # YY
        eval_ops_valid=None,  # :YY
        num_evals_valid=0,  # YY
        graph=None,
        master='',
        is_chief=True,
        global_step=None,
        number_of_steps=None,
        init_op=_USE_DEFAULT,
        init_feed_dict=None,
        local_init_op=_USE_DEFAULT,
        init_fn=None,
        ready_op=_USE_DEFAULT,
        summary_op=_USE_DEFAULT,
        save_summaries_secs=600,
        summary_writer=_USE_DEFAULT,
        startup_delay_steps=0,
        saver=None,
        save_interval_secs=600,
        # sync_optimizer=None, # :YY
        session_config=None,
        trace_every_n_steps=None):
    """Runs a training loop using a TensorFlow supervisor.

  When the sync_optimizer is supplied, gradient updates are applied
  synchronously. Otherwise, gradient updates are applied asynchronous.

  Args:
    train_op: A `Tensor` that, when executed, will apply the gradients and
      return the loss value.
    logdir: The directory where training logs are written to. If None, model
      checkpoints and summaries will not be written.
    train_step_fn: The function to call in order to execute a single gradient
      step. The function must have take exactly four arguments: the current
      session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary.
    train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By
      default, two `Boolean`, scalar ops called "should_stop" and "should_log"
      are provided.
    log_every_n_steps: The frequency, in terms of global steps, that the loss
      and global step and logged.
    graph: The graph to pass to the supervisor. If no graph is supplied the
      default graph is used.
    master: The address of the tensorflow master.
    is_chief: Specifies whether or not the training is being run by the primary
      replica during replica training.
    global_step: The `Tensor` representing the global step. If left as `None`,
      then slim.variables.get_or_create_global_step() is used.
    number_of_steps: The max number of gradient steps to take during training.
      If the value is left as None, training proceeds indefinitely.
    init_op: The initialization operation. If left to its default value, then
      the session is initialized by calling `tf.global_variables_initializer()`.
    init_feed_dict: A feed dictionary to use when executing the `init_op`.
    local_init_op: The local initialization operation. If left to its default
      value, then the session is initialized by calling
      `tf.local_variables_initializer()` and `tf.tables_initializer()`.
    init_fn: An optional callable to be executed after `init_op` is called. The
      callable must accept one argument, the session being initialized.
    ready_op: Operation to check if the model is ready to use. If left to its
      default value, then the session checks for readiness by calling
      `tf.report_uninitialized_variables()`.
    summary_op: The summary operation.
    save_summaries_secs: How often, in seconds, to save summaries.
    summary_writer: `SummaryWriter` to use.  Can be `None`
      to indicate that no summaries should be written. If unset, we
      create a SummaryWriter.
    startup_delay_steps: The number of steps to wait for before beginning. Note
      that this must be 0 if a sync_optimizer is supplied.
    saver: Saver to save checkpoints. If None, a default one will be created
      and used.
    save_interval_secs: How often, in seconds, to save the model to `logdir`.
    sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the
      argument is supplied, gradient updates will be synchronous. If left as
      `None`, gradient updates will be asynchronous.
    session_config: An instance of `tf.ConfigProto` that will be used to
      configure the `Session`. If left as `None`, the default will be used.
    trace_every_n_steps: produce and save a `Timeline` in Chrome trace format
      and add it to the summaries every `trace_every_n_steps`. If None, no trace
      information will be produced or saved.

  Returns:
    the value of the loss function after training.

  Raises:
    ValueError: if `train_op` is empty or if `startup_delay_steps` is
      non-zero when `sync_optimizer` is supplied, if `number_of_steps` is
      negative, or if `trace_every_n_steps` is not `None` and no `logdir` is
      provided.
  """
    if train_op is None:
        raise ValueError('train_op cannot be None.')

    if logdir is None:
        if summary_op != _USE_DEFAULT:
            raise ValueError('Cannot provide summary_op because logdir=None')
        if saver is not None:
            raise ValueError('Cannot provide saver because logdir=None')
        if trace_every_n_steps is not None:
            raise ValueError('Cannot provide trace_every_n_steps because '
                             'logdir=None')

    if number_of_steps is not None and number_of_steps <= 0:
        raise ValueError(
            '`number_of_steps` must be either None or a positive number.')

    graph = graph or ops.get_default_graph()
    with graph.as_default():
        if global_step is None:
            global_step = variables.get_or_create_global_step()
        saver = saver or tf_saver.Saver()

        with ops.name_scope('init_ops'):
            if init_op == _USE_DEFAULT:
                init_op = tf_variables.global_variables_initializer()

            if ready_op == _USE_DEFAULT:
                ready_op = tf_variables.report_uninitialized_variables()

            if local_init_op == _USE_DEFAULT:
                ## YY: I separated them,
                y_local_init_op = tf_variables.local_variables_initializer()
                ## end YY
                local_init_op = control_flow_ops.group(
                    y_local_init_op,  # YY # tf_variables.local_variables_initializer(),
                    data_flow_ops.tables_initializer())

            ready_for_local_init_op = None

        if summary_op == _USE_DEFAULT:
            summary_op = summary.merge_all()

        if summary_writer == _USE_DEFAULT:
            summary_writer = supervisor.Supervisor.USE_DEFAULT

        if train_step_kwargs == _USE_DEFAULT:
            with ops.name_scope('train_step'):
                train_step_kwargs = {}

                if number_of_steps:
                    should_stop_op = math_ops.greater_equal(
                        global_step, number_of_steps)
                else:
                    should_stop_op = constant_op.constant(False)
                train_step_kwargs['should_stop'] = should_stop_op
                train_step_kwargs['should_log'] = math_ops.equal(
                    math_ops.mod(global_step, log_every_n_steps), 0)
                if is_chief and trace_every_n_steps is not None:
                    train_step_kwargs['should_trace'] = math_ops.equal(
                        math_ops.mod(global_step, trace_every_n_steps), 0)
                    train_step_kwargs['logdir'] = logdir
                ## YY:
                if train_step_kwargs_extra is not None:
                    for name in train_step_kwargs_extra:
                        train_step_kwargs[name] = train_step_kwargs_extra[name]
                ## :YY

    sv = supervisor.Supervisor(graph=graph,
                               is_chief=is_chief,
                               logdir=logdir,
                               init_op=init_op,
                               init_feed_dict=init_feed_dict,
                               local_init_op=local_init_op,
                               ready_for_local_init_op=ready_for_local_init_op,
                               ready_op=ready_op,
                               summary_op=summary_op,
                               summary_writer=summary_writer,
                               global_step=global_step,
                               saver=saver,
                               save_summaries_secs=save_summaries_secs,
                               save_model_secs=save_interval_secs,
                               init_fn=init_fn)

    if summary_writer is not None:
        train_step_kwargs['summary_writer'] = sv.summary_writer
        train_step_kwargs[
            'local_init_op'] = y_local_init_op  # tf_variables.local_variables_initializer() #local_init_op  ## YY I added this

    should_retry = True
    while should_retry:
        try:
            should_retry = False
            with sv.managed_session(master,
                                    start_standard_services=False,
                                    config=session_config) as sess:
                logging.info('Starting Session.')
                if is_chief:
                    if logdir:
                        sv.start_standard_services(sess)
                elif startup_delay_steps > 0:
                    _wait_for_step(
                        sess, global_step,
                        min(startup_delay_steps, number_of_steps
                            or sys.maxint))
                sv.start_queue_runners(sess)
                logging.info('Starting Queues.')
                try:
                    while not sv.should_stop():
                        total_loss, should_stop = train_step_fn(
                            sess, train_op, global_step, train_step_kwargs)
                        if should_stop:
                            logging.info('Stopping Training.')
                            break
                except errors.OutOfRangeError:
                    # OutOfRangeError is thrown when epoch limit per
                    # tf.train.limit_epochs is reached.
                    logging.info('Caught OutOfRangeError. Stopping Training.')
                if logdir and sv.is_chief:
                    logging.info('Finished training! Saving model to disk.')
                    sv.saver.save(sess,
                                  sv.save_path,
                                  global_step=sv.global_step)

                    ## YY: evaluating here
                    if eval_ops is not None:
                        logging.info(
                            '********* Starting evaluation on Training set, at '
                            +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
                        evaluation_y.evaluate_loop_slim_streaming_metrics(
                            sess, num_evals, eval_ops)
                        summary_str = sess.run(summary_op)
                        sv.summary_computed(sess, summary_str)
                        logging.info(
                            '********* Finished evaluation on Training set, at '
                            +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
                    ## YY: starting evaluation on validation set if exist:
                    if eval_ops_valid is not None:
                        logging.info(
                            '********* Starting evaluation on Validation set, at '
                            +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
                        np_global_step = training_util.global_step(
                            sess, global_step)
                        evaluation_y.evaluate_loop_slim_streaming_metrics(
                            sess, num_evals_valid, eval_ops_valid)
                        summary_str = sess.run(summary_op)
                        sv.summary_writer.add_summary(summary_str,
                                                      np_global_step)
                        logging.info(
                            '********* Finished evaluation on Validation set, at '
                            +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
                    # :YY

        except errors.AbortedError:
            # Always re-run on AbortedError as it indicates a restart of one of the
            # distributed tensorflow servers.
            logging.info('Retrying training!')
            should_retry = True

    return total_loss
def sample_symbols_new(logits, log_probs, finished, lengths, time):
    """
    :param logits: [batch_size * beam_size, target_vocab_size]
    :param log_probs: [batch_size * beam_size, ]
    :param finished: [batch_size * beam_size, ]
    :param lengths: decoding length [batch_size * beam_size, ]
    :param time:
    :return:
    """

    # [batch_size * beam_size,]
    prev_finished_float = math_ops.to_float(finished)
    # [batch_size * beam_size, ]
    prev_log_probs = log_probs
    # [batch_size * beam_size, target_vocab_size]
    probs = advanced_log_softmax(logits)  # negative

    # mask the finished beam except only one entrance (target_eos_id)
    #   [target_vocab_size, ]: [float_min, float_min, float_min, ..., 0]
    #   this forces the beam with EOS continue to generate EOS
    finished_beam_bias = finished_beam_one_entry_bias(on_entry=eos_id,
                                                      num_entries=vocab_size)
    # [batch_size * beam_size, target_vocab_size]: outer product
    finished_beam_bias = expand_to_beam_size(finished_beam_bias,
                                             beam_size * batch_size,
                                             axis=0)
    finished_beam_bias *= array_ops.expand_dims(prev_finished_float, 1)
    # compute new probs, with finished flags & mask
    probs = probs * array_ops.expand_dims(1. - prev_finished_float,
                                          1) + finished_beam_bias

    # [batch_size * beam_size, target_vocab_size]
    # compute new log_probs
    log_probs = probs + array_ops.expand_dims(prev_log_probs, 1)
    # new decoding length: [batch_size * beam_size]
    lengths = lengths + 1 - math_ops.to_int32(finished)
    # compute beam score
    #  length_penalty: [batch_size * beam_size,]
    length_penalty = math_ops.pow(((5.0 + math_ops.to_float(lengths)) / 6.0),
                                  -alpha)
    scores = log_probs * array_ops.expand_dims(length_penalty, axis=1)

    # flatten
    # [batch_size, beam_size * target_vocab_size]
    scores = array_ops.reshape(array_ops.reshape(scores, [-1]),
                               [batch_size, -1])
    ret_log_probs = array_ops.reshape(array_ops.reshape(log_probs, [-1]),
                                      [batch_size, -1])

    scores_flat = control_flow_ops.cond(
        ops.convert_to_tensor(time) > 0,
        lambda: scores,  # time > 0: all
        lambda: array_ops.slice(scores, [0, 0], [-1, vocab_size])
    )  # time = 0: first logits in each batch

    # [batch_size, beam_size] will restore top live_k
    sample_scores, sample_ids = nn_ops.top_k(scores_flat, k=beam_size)
    ret_sample_ids = array_ops.reshape(sample_ids, [-1])
    # flatten: [batch_size * beam_size,]
    sample_ids = array_ops.reshape(sample_ids, [-1])
    # because we do topk to scores with dim:[batch, beam * vocab]
    #   we need to cover the true word ids
    word_ids = math_ops.mod(sample_ids, vocab_size)

    # beam ids should be adjusted according to batch_size
    #  batch_pos, [batch_size, beam_size]: [[0, 0, ...], [1, 1,...], [batch_size,...] ]
    batch_pos = compute_batch_indices(batch_size, beam_size)

    # compute new beam_ids, [batch_size * beam_size, ]
    beam_ids = math_ops.div(sample_ids, vocab_size) \
               + array_ops.reshape(batch_pos * beam_size, [-1])

    # we need to recover log_probs from score
    # flatten sample_scores: [batch_size * beam_size,]
    sample_scores_flatten = array_ops.reshape(sample_scores, [-1])
    # gather each length penalty
    length_penalty = gather_states(length_penalty, beam_ids)
    # recover log probabilities
    next_log_probs = sample_scores_flatten / length_penalty
    # gather states according to beam_ids
    next_lengths = gather_states(lengths, beam_ids)

    # [batch_size * beam_size * vocab_size, ]
    log_probs_flat = array_ops.reshape(log_probs, [-1])
    log_probs_index = array_ops.reshape(
        batch_pos, [-1]) * beam_size * vocab_size + sample_ids
    next_log_probs = array_ops.gather(log_probs_flat, log_probs_index)

    return word_ids, beam_ids, next_log_probs, next_lengths, ret_log_probs, ret_sample_ids, length_penalty
Ejemplo n.º 36
0
 def even(x):
   return math_ops.equal(math_ops.mod(x, 2), 0)
Ejemplo n.º 37
0
def train(train_op,
          logdir,
          train_step_fn=train_step,
          train_step_kwargs=_USE_DEFAULT,
          log_every_n_steps=1,
          graph=None,
          master='',
          is_chief=True,
          global_step=None,
          number_of_steps=None,
          init_op=_USE_DEFAULT,
          init_feed_dict=None,
          local_init_op=None,
          init_fn=None,
          summary_op=_USE_DEFAULT,
          save_summaries_secs=600,
          startup_delay_steps=0,
          saver=None,
          save_interval_secs=600,
          sync_optimizer=None):
    """Runs a training loop using a TensorFlow supervisor.

  When the sync_optimizer is supplied, gradient updates are applied
  synchronously. Otherwise, gradient updates are applied asynchronous.

  Args:
    train_op: A `Tensor` that, when executed, will apply the gradients and
      return the loss value.
    logdir: The directory where training logs are written to.
    train_step_fn: The function to call in order to execute a single gradient
      step. The function must have take exactly four arguments: the current
      session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary.
    train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By
      default, two `Boolean`, scalar ops called "should_stop" and "should_log"
      are provided.
    log_every_n_steps: The frequency, in terms of global steps, that the loss
      and global step and logged.
    graph: The graph to pass to the supervisor. If no graph is supplied the
      default graph is used.
    master: The BNS name of the tensorflow master.
    is_chief: Specifies whether or not the training is being run by the primary
      replica during replica training.
    global_step: The `Tensor` representing the global step. If left as `None`,
      then slim.variables.get_or_create_global_step() is used.
    number_of_steps: The max number of gradient steps to take during training.
      If the value is left as None, training proceeds indefinitely.
    init_op: The initialization operation. If left to its default value, then
      the session is initialized by calling `tf.initialize_all_variables()`.
    init_feed_dict: A feed dictionary to use when executing the `init_op`.
    local_init_op: The local initialization operation. If None,
      then the session is initialized by calling
      `tf.initialize_local_variables()` and `tf.initialize_all_tables()`.
    init_fn: An optional callable to be executed after `init_op` is called. The
      callable must accept one argument, the session being initialized.
    summary_op: The summary operation.
    save_summaries_secs: How often, in seconds, to save summaries.
    startup_delay_steps: The number of steps to wait for before beginning. Note
      that this must be 0 if a sync_optimizer is supplied.
    saver: Saver to save checkpoints. If none, a default one will be created
      and used.
    save_interval_secs: How often, in seconds, to save the model to `logdir`.
    sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the
      argument is supplied, gradient updates will be synchronous. If left as
      `None`, gradient updates will be asynchronous.

  Returns:
    the value of the loss function after training.

  Raises:
    ValueError: if `train_op` is empty or if `startup_delay_steps` is
      non-zero when `sync_optimizer` is supplied, or if `number_of_steps` is
      negative.
  """
    if train_op is None:
        raise ValueError('train_op cannot be None.')

    if sync_optimizer and startup_delay_steps > 0:
        raise ValueError(
            'startup_delay_steps must be zero when sync_optimizer is supplied.'
        )

    if number_of_steps is not None and number_of_steps <= 0:
        raise ValueError(
            '`number_of_steps` must be either None or a positive number.')

    graph = graph or ops.get_default_graph()
    with graph.as_default():
        if global_step is None:
            global_step = variables.get_or_create_global_step()
        saver = saver or tf_saver.Saver()

    if init_op == _USE_DEFAULT:
        init_op = tf_variables.initialize_all_variables()

    if summary_op == _USE_DEFAULT:
        summary_op = logging_ops.merge_all_summaries()

    cleanup_op = None

    if is_chief and sync_optimizer:
        if not isinstance(sync_optimizer,
                          sync_replicas_optimizer.SyncReplicasOptimizer):
            raise ValueError(
                '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer')

        # Need to create these BEFORE the supervisor finalizes the graph:
        with ops.control_dependencies([init_op]):
            init_tokens_op = sync_optimizer.get_init_tokens_op()
        init_op = init_tokens_op
        chief_queue_runner = sync_optimizer.get_chief_queue_runner()
        cleanup_op = sync_optimizer.get_clean_up_op()

    if train_step_kwargs == _USE_DEFAULT:
        train_step_kwargs = {}

        if number_of_steps:
            should_stop_op = math_ops.greater_equal(global_step,
                                                    number_of_steps)
        else:
            should_stop_op = constant_op.constant(False)
        train_step_kwargs['should_stop'] = should_stop_op
        train_step_kwargs['should_log'] = math_ops.equal(
            math_ops.mod(global_step, log_every_n_steps), 0)

    sv = supervisor.Supervisor(graph=graph,
                               is_chief=is_chief,
                               logdir=logdir,
                               init_op=init_op,
                               init_feed_dict=init_feed_dict,
                               local_init_op=local_init_op,
                               summary_op=summary_op,
                               global_step=global_step,
                               saver=saver,
                               save_summaries_secs=save_summaries_secs,
                               save_model_secs=save_interval_secs,
                               init_fn=init_fn)

    with sv.managed_session(master, start_standard_services=False) as sess:
        if is_chief:
            sv.start_standard_services(sess)
        elif not is_chief and startup_delay_steps > 0:
            _wait_for_step(
                sess, global_step,
                min(startup_delay_steps, number_of_steps or sys.maxint))
        sv.start_queue_runners(sess)
        if is_chief and sync_optimizer:
            sv.start_queue_runners(sess, [chief_queue_runner])

        try:
            while not sv.should_stop():
                total_loss, should_stop = train_step_fn(
                    sess, train_op, global_step, train_step_kwargs)
                if should_stop:
                    break
        finally:
            if sv.is_chief and cleanup_op is not None:
                sess.run(cleanup_op)

        # This waits for service threads to finish.
        sv.Stop()

        if sv.is_chief:
            logging.info('Finished training! Saving model to disk.')
            sv.saver.save(sess, sv.save_path, global_step=sv.global_step)

        return total_loss
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_biasCNN.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        dataset_val = dataset_biasCNN.get_dataset(FLAGS.dataset_name,
                                                  'validation',
                                                  FLAGS.dataset_dir)

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        network_fn_val = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing(
            preprocessing_name,
            is_training=True,
            flipLR=FLAGS.flipLR,
            random_scale=FLAGS.random_scale,
            is_windowed=FLAGS.is_windowed)

        image_preprocessing_fn_val = preprocessing_biasCNN.get_preprocessing(
            preprocessing_name,
            is_training=False,
            flipLR=FLAGS.flipLR,
            random_scale=FLAGS.random_scale,
            is_windowed=FLAGS.is_windowed)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])
            label -= FLAGS.labels_offset

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image, train_image_size,
                                           train_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

            ############################################
            # Create a provider for the validation set #
            ############################################
            provider_val = slim.dataset_data_provider.DatasetDataProvider(
                dataset_val,
                shuffle=True,
                common_queue_capacity=2 * FLAGS.batch_size_val,
                common_queue_min=FLAGS.batch_size_val)
            [image_val, label_val] = provider_val.get(['image', 'label'])
            label_val -= FLAGS.labels_offset

            eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

            image_val = image_preprocessing_fn_val(image_val, eval_image_size,
                                                   eval_image_size)

            images_val, labels_val = tf.train.batch(
                [image_val, label_val],
                batch_size=FLAGS.batch_size_val,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size_val)

        ###############################
        # Define the model (training) #
        ###############################

        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()

            with tf.variable_scope('my_scope'):
                logits, end_points = network_fn(images)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                slim.losses.softmax_cross_entropy(
                    end_points['AuxLogits'],
                    labels,
                    label_smoothing=FLAGS.label_smoothing,
                    weights=0.4,
                    scope='aux_loss')
            slim.losses.softmax_cross_entropy(
                logits,
                labels,
                label_smoothing=FLAGS.label_smoothing,
                weights=1.0)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        if FLAGS.quantize_delay >= 0:
            tf.contrib.quantize.create_training_graph(
                quant_delay=FLAGS.quantize_delay)

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                total_num_replicas=FLAGS.worker_replicas,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        #################################
        # Define the model (validation) #
        #################################

        with tf.variable_scope('my_scope', reuse=True):
            logits_val, _ = network_fn_val(images_val)

        predictions_val = tf.argmax(logits_val, 1)
        labels_val = tf.squeeze(labels_val)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions_val, labels_val),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits_val, labels_val, 5)
        })

        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection('summaries', op)

        # Gather validation summaries
        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES))
        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # Create a non-default saver so we don't delete all the old checkpoints.
        my_saver = tf_saver.Saver(
            max_to_keep=FLAGS.max_checkpoints_to_keep,
            keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
        )

        # Create a non-default dictionary of options for train_step_fn
        # This is a hack that lets us pass everything we need to run evaluation, into the training loop function
        from tensorflow.python.framework import ops
        from tensorflow.python.framework import constant_op
        from tensorflow.python.ops import math_ops

        with ops.name_scope('train_step'):
            train_step_kwargs = {}

            if FLAGS.max_number_of_steps:
                should_stop_op = math_ops.greater_equal(
                    global_step, FLAGS.max_number_of_steps)
            else:
                should_stop_op = constant_op.constant(False)
            train_step_kwargs['should_stop'] = should_stop_op
            if FLAGS.log_every_n_steps > 0:
                train_step_kwargs['should_log'] = math_ops.equal(
                    math_ops.mod(global_step, FLAGS.log_every_n_steps), 0)
            train_step_kwargs['should_val'] = math_ops.equal(
                math_ops.mod(global_step, FLAGS.val_every_n_steps), 0)
            train_step_kwargs['eval_op'] = list(names_to_updates.values())


#    assert(FLAGS.max_number_of_steps==100000)
        print(should_stop_op)
        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            init_fn=_get_init_fn(),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs,
            sync_optimizer=optimizer if FLAGS.sync_replicas else None,
            saver=my_saver,
            train_step_fn=learning_biasCNN.train_step_fn,
            train_step_kwargs=train_step_kwargs)
Ejemplo n.º 39
0
 def _shard_indices(self, keys):
   if self._key_dtype == dtypes.string:
     indices = string_ops.string_to_hash_bucket_fast(keys, self._num_shards)
   else:
     indices = math_ops.mod(keys, self._num_shards)
   return math_ops.cast(indices, dtypes.int32)
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to global variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
        global_old = set(n.op.name for n in variables.global_variables())
        apply_updates = self._opt.apply_gradients(grads_and_vars)
        global_new = set(n.op.name for n in variables.global_variables())
        with ops.control_dependencies([apply_updates]):
            local_update = state_ops.assign_add(self._local_step,
                                                1,
                                                name='local_step_update').op

        # this is for place the variables created by optimizer to local collection
        # e.g., AdamOptimizer will create beta as global variables
        def _adjust_optimizer_variable_collection(opt_vars):
            g = ops.get_default_graph()
            idx = 0
            for _ in range(len(
                    g._collections[ops.GraphKeys.GLOBAL_VARIABLES])):
                var = g.get_collection_ref(ops.GraphKeys.GLOBAL_VARIABLES)[idx]
                name = var.op.name
                if name in opt_vars:
                    ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var)
                    del g.get_collection_ref(
                        ops.GraphKeys.GLOBAL_VARIABLES)[idx]
                else:
                    idx += 1

        _adjust_optimizer_variable_collection(global_new - global_old)

        # update global variables.
        def _Update_global_variables():
            local_vars = [v for g, v in grads_and_vars if g is not None]
            global_center_vars = [self._global_map[var] for var in local_vars]
            local_center_vars = [self._local_map[var] for var in local_vars]
            local_center_vars_update = []
            for lvar, var in zip(local_center_vars, global_center_vars):
                local_center_vars_update.append(lvar.assign(var))
            update_ops = []
            differences = []
            with ops.control_dependencies(local_center_vars_update):
                for v, lv in zip(local_vars, local_center_vars):
                    with ops.device(v.device):
                        differences.append(math_ops.subtract(v, lv))
                for lvar, diff in zip(local_vars, differences):
                    with ops.device(lvar.device):
                        update_ops.append(
                            state_ops.assign_sub(
                                lvar,
                                math_ops.multiply(self._moving_rate, diff)))
                for var, diff in zip(global_center_vars, differences):
                    with ops.device(var.device):
                        update_ops.append(
                            state_ops.assign_add(
                                var, math_ops.multiply(self._moving_rate,
                                                       diff)))
                if global_step:
                    with ops.colocate_with(global_step):
                        update_ops.append(state_ops.assign_add(global_step, 1))
            variable_update = control_flow_ops.group(*(update_ops))
            return variable_update

        with ops.control_dependencies([local_update]):
            condition = math_ops.equal(
                math_ops.mod(self._local_step, self._period), 0)
            conditional_update = control_flow_ops.cond(
                condition, _Update_global_variables, control_flow_ops.no_op)
        return conditional_update
Ejemplo n.º 41
0
    def _apply_gradient(self, grad, var, indices=None):
        """The main function to update a variable.

    Args:
      grad: A Tensor containing gradient to apply.
      var: A Tensor containing the variable to update.
      indices: An array of integers, for sparse update.

    Returns:
      Updated variable var = var - learning_rate * preconditioner * grad

    If the gradient is dense, var and grad have the same shape.
    If the update is sparse, then the first dimension of the gradient and var
    may differ, others are all the same. In this case the indices array
    provides the set of indices of the variable which are to be updated with
    each row of the gradient.
    """
        global_step = self._global_step + 1

        # Update accumulated weighted average of gradients
        gbar = self.get_slot(var, "gbar")
        gbar_decay_t = GetParam(self._gbar_decay, global_step)
        gbar_weight_t = GetParam(self._gbar_weight, global_step)
        if indices is not None:
            # Note - the sparse update is not easily implemented, since the
            # algorithm needs all indices of gbar to be updated
            # if mat_gbar_decay != 1 or mat_gbar_decay != 0.
            # One way to make mat_gbar_decay = 1 is by rescaling.
            # If we want the update:
            #         G_{t+1} = a_{t+1} G_t + b_{t+1} w_t
            # define:
            #         r_{t+1} = a_{t+1} * r_t
            #         h_t = G_t / r_t
            # Then:
            #         h_{t+1} = h_t + (b_{t+1} / r_{t+1}) * w_t
            # So we get the mat_gbar_decay = 1 as desired.
            # We can implement this in a future version as needed.
            # However we still need gbar_decay = 0, otherwise all indices
            # of the variable will need to be updated.
            if self._gbar_decay != 0.0:
                tf_logging.warning("Not applying momentum for variable: %s" %
                                   var.name)
            gbar_updated = grad
        else:
            gbar_updated = self._weighted_average(gbar, self._gbar_decay,
                                                  gbar_decay_t,
                                                  gbar_weight_t * grad)

        # Update the preconditioners and compute the preconditioned gradient
        shape = var.get_shape()
        mat_g_list = []
        for i in range(len(shape)):
            mat_g_list.append(self.get_slot(var, "Gbar_" + str(i)))
        mat_gbar_decay_t = GetParam(self._mat_gbar_decay, global_step)
        mat_gbar_weight_t = GetParam(self._mat_gbar_weight, global_step)

        preconditioned_grad = gbar_updated
        v_rank = len(mat_g_list)
        neg_alpha = -GetParam(self._alpha, global_step) / v_rank
        svd_interval = GetParam(self._svd_interval, global_step)
        precond_update_interval = GetParam(self._precond_update_interval,
                                           global_step)
        for i, mat_g in enumerate(mat_g_list):
            # axes is the list of indices to reduce - everything but the current i.
            axes = list(range(i)) + list(range(i + 1, v_rank))
            if shape[i] <= self._max_matrix_size:
                # If the tensor size is sufficiently small perform full Shampoo update
                # Note if precond_update_interval > 1 and mat_gbar_decay_t != 1, this
                # is not strictly correct. However we will use it for now, and
                # fix if needed. (G_1 = aG + bg ==> G_n = a^n G + (1+a+..+a^{n-1})bg)

                # pylint: disable=g-long-lambda,cell-var-from-loop
                mat_g_updated = control_flow_ops.cond(
                    math_ops.mod(global_step, precond_update_interval) < 1,
                    lambda: self._update_mat_g(
                        mat_g, grad, axes, mat_gbar_decay_t, mat_gbar_weight_t
                        * precond_update_interval, i), lambda: mat_g)

                mat_g_updated = mat_g_updated / float(shape[i].value)

                if self._svd_interval == 1:
                    mat_h = self._compute_power(var, mat_g_updated, shape[i],
                                                neg_alpha)
                else:
                    mat_h = control_flow_ops.cond(
                        math_ops.mod(global_step, svd_interval) < 1,
                        lambda: self._compute_power(var, mat_g_updated, shape[
                            i], neg_alpha, "H_" + str(i)),
                        lambda: self.get_slot(var, "H_" + str(i)))

                # mat_h is a square matrix of size d_i x d_i
                # preconditioned_grad is a d_i x ... x d_n x d_0 x ... d_{i-1} tensor
                # After contraction with a d_i x d_i tensor
                # it becomes a d_{i+1} x ... x d_n x d_0 x ... d_i tensor
                # (the first dimension is contracted out, and the second dimension of
                # mat_h is appended).  After going through all the indices, it becomes
                # a d_0 x ... x d_n tensor again.
                preconditioned_grad = math_ops.tensordot(preconditioned_grad,
                                                         mat_h,
                                                         axes=([0], [0]),
                                                         name="precond_" +
                                                         str(i))
            else:
                # Tensor size is too large -- perform diagonal Shampoo update
                # Only normalize non-vector cases.
                if axes:
                    normalizer = 1.0 if indices is not None else float(
                        shape[i].value)
                    grad_outer = math_ops.reduce_sum(grad * grad,
                                                     axis=axes) / normalizer
                else:
                    grad_outer = grad * grad

                if i == 0 and indices is not None:
                    assert self._mat_gbar_decay == 1.0
                    mat_g_updated = state_ops.scatter_add(
                        mat_g, indices, mat_gbar_weight_t * grad_outer)
                    mat_h = math_ops.pow(
                        array_ops.gather(mat_g_updated, indices) +
                        self._epsilon, neg_alpha)
                else:
                    mat_g_updated = self._weighted_average(
                        mat_g, self._mat_gbar_decay, mat_gbar_decay_t,
                        mat_gbar_weight_t * grad_outer)
                    mat_h = math_ops.pow(mat_g_updated + self._epsilon,
                                         neg_alpha)

                # Need to do the transpose to ensure that the tensor becomes
                # a d_{i+1} x ... x d_n x d_0 x ... d_i tensor as described above.
                preconditioned_grad = array_ops.transpose(
                    preconditioned_grad,
                    perm=list(range(1, v_rank)) + [0]) * mat_h

        # Update the variable based on the Shampoo update
        learning_rate_t = GetParam(self._learning_rate, global_step)
        if indices is not None:
            var_updated = state_ops.scatter_add(
                var, indices, -learning_rate_t * preconditioned_grad)
        else:
            var_updated = state_ops.assign_sub(
                var, learning_rate_t * preconditioned_grad)
        return var_updated
Ejemplo n.º 42
0
 def defaults_two():
     return control_flow_ops.cond(math_ops.equal(
         math_ops.mod(x, 2), 0),
                                  multiply,
                                  divide,
                                  name="cond_mult")
Ejemplo n.º 43
0
def rotate_transpose(x, shift, name="rotate_transpose"):
  """Circularly moves dims left or right.

  Effectively identical to:

  ```python
  numpy.transpose(x, numpy.roll(numpy.arange(len(x.shape)), shift))
  ```

  When `validate_args=False` additional graph-runtime checks are
  performed. These checks entail moving data from to GPU to CPU.

  Example:

    ```python
    x = ... # Tensor of shape [1, 2, 3, 4].
    rotate_transpose(x, -1)  # result shape: [2, 3, 4, 1]
    rotate_transpose(x, -2)  # result shape: [3, 4, 1, 2]
    rotate_transpose(x,  1)  # result shape: [4, 1, 2, 3]
    rotate_transpose(x,  2)  # result shape: [3, 4, 1, 2]
    rotate_transpose(x, 7) == rotate_transpose(x, 3)
    rotate_transpose(x, -7) == rotate_transpose(x, -3)
    ```

  Args:
    x: `Tensor`.
    shift: `Tensor`. Number of dimensions to transpose left (shift<0) or
      transpose right (shift>0).
    name: `String`. The name to give this op.

  Returns:
    rotated_x: Input `Tensor` with dimensions circularly rotated by shift.

  Raises:
    TypeError: if shift is not integer type.
  """
  with ops.name_scope(name, values=[x, shift]):
    x = ops.convert_to_tensor(x, name="x")
    shift = ops.convert_to_tensor(shift, name="shift")
    # We do not assign back to preserve constant-ness.
    check_ops.assert_integer(shift)
    shift_value_static = tensor_util.constant_value(shift)
    ndims = x.get_shape().ndims
    if ndims is not None and shift_value_static is not None:
      if ndims < 2: return x
      shift_value_static = np.sign(shift_value_static) * (
          abs(shift_value_static) % ndims)
      if shift_value_static == 0: return x
      perm = np.roll(np.arange(ndims), shift_value_static)
      return array_ops.transpose(x, perm=perm)
    else:
      # Consider if we always had a positive shift, and some specified
      # direction.
      # When shifting left we want the new array:
      #   last(x, n-shift) + first(x, shift)
      # and if shifting right then we want:
      #   last(x, shift) + first(x, n-shift)
      # Observe that last(a) == slice(a, n) and first(a) == slice(0, a).
      # Also, we can encode direction and shift as one: direction * shift.
      # Combining these facts, we have:
      #   a = cond(shift<0, -shift, n-shift)
      #   last(x, n-a) + first(x, a) == x[a:n] + x[0:a]
      # Finally, we transform shift by modulo length so it can be specified
      # independently from the array upon which it operates (like python).
      ndims = array_ops.rank(x)
      shift = array_ops.where(math_ops.less(shift, 0),
                              math_ops.mod(-shift, ndims),
                              ndims - math_ops.mod(shift, ndims))
      first = math_ops.range(0, shift)
      last = math_ops.range(shift, ndims)
      perm = array_ops.concat((last, first), 0)
      return array_ops.transpose(x, perm=perm)
Ejemplo n.º 44
0
 def _shard_indices(self, keys):
   if self._key_dtype == dtypes.string:
     indices = string_ops.string_to_hash_bucket_fast(keys, self._num_shards)
   else:
     indices = math_ops.mod(keys, self._num_shards)
   return math_ops.cast(indices, dtypes.int32)
Ejemplo n.º 45
0
 def f(x1, x2):
   if x1.dtype == dtypes.bool:
     assert x2.dtype == dtypes.bool
     x1 = math_ops.cast(x1, dtypes.int8)
     x2 = math_ops.cast(x2, dtypes.int8)
   return math_ops.mod(x1, x2)
Ejemplo n.º 46
0
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    """Apply gradients to variables.

    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer. The chief work updates global
    variables.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.

    Returns:
      A conditional 'Operation' that update both local and global variables or
      just local variables

    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """

    # update local variables
    if not grads_and_vars:
      raise ValueError("Must supply at least one variable")
    if global_step is None:
      raise ValueError("Global step is required")

    apply_updates = self._opt.apply_gradients(grads_and_vars)
    with ops.control_dependencies([apply_updates]):
      local_update = state_ops.assign_add(
          self._local_step, 1, name="local_step_update").op

    # update global variables.
    def _update_global_variables():  # pylint: disable=missing-docstring
      local_vars = [v for g, v in grads_and_vars if g is not None]
      global_vars = [self._local_2_global[v] for v in local_vars]
      # sync queue
      with ops.colocate_with(global_step):
        sync_queue = data_flow_ops.FIFOQueue(
            -1, [dtypes.bool], shapes=[[]], shared_name="sync_queue")
      train_ops = []
      aggregated_vars = []
      with ops.name_scope(None, self._name + "/global"):
        for var, gvar in zip(local_vars, global_vars):
          # pylint: disable=protected-access
          with ops.device(gvar.device):
            if isinstance(var._ref(), ops.Tensor):
              var_accum = data_flow_ops.ConditionalAccumulator(
                  var.dtype,
                  shape=var.get_shape(),
                  shared_name=gvar.name + "/var_accum")
              train_ops.append(
                  var_accum.apply_grad(var._ref(), local_step=global_step))
              aggregated_vars.append(var_accum.take_grad(self._num_worker))
            else:
              raise ValueError("Unknown local variable type!")
            self._accumulator_list.append((var_accum, gvar.device))
      # chief worker updates global vars and enqueues tokens to the sync queue
      if self._is_chief:
        update_ops = []
        with ops.control_dependencies(train_ops):
          for avg_var, gvar in zip(aggregated_vars, global_vars):
            with ops.device(gvar.device):
              update_ops.append(state_ops.assign(gvar, avg_var))
          with ops.device(global_step.device):
            update_ops.append(state_ops.assign_add(global_step, 1))
        with ops.control_dependencies(update_ops), ops.device(
            global_step.device):
          tokens = array_ops.fill([self._num_worker - 1],
                                  constant_op.constant(False))
          sync_op = sync_queue.enqueue_many(tokens)
      else:
        with ops.control_dependencies(train_ops), ops.device(
            global_step.device):
          sync_op = sync_queue.dequeue()

      with ops.control_dependencies([sync_op]):
        local_update_op = self._local_vars_update(local_vars)
      return local_update_op

    with ops.control_dependencies([local_update]):
      condition = math_ops.equal(
          math_ops.mod(self._local_step, self._interval_steps), 0)
      conditional_update = control_flow_ops.cond(
          condition, _update_global_variables, control_flow_ops.no_op)

    chief_init_ops = []
    for accum, dev in self._accumulator_list:
      with ops.device(dev):
        chief_init_ops.append(
            accum.set_global_step(global_step, name="SetGlobalStep"))
    self._chief_init_op = control_flow_ops.group(*(chief_init_ops))

    return conditional_update
def main(_):
    #tf.disable_v2_behavior() ###
    tf.compat.v1.disable_eager_execution()
    tf.compat.v1.enable_resource_variables()

    # Enable habana bf16 conversion pass
    if FLAGS.dtype == 'bf16':
        os.environ['TF_BF16_CONVERSION'] = flags.FLAGS.bf16_config_path
        FLAGS.precision = 'bf16'
    else:
        os.environ['TF_BF16_CONVERSION'] = "0"

    if FLAGS.use_horovod:
        hvd_init()

    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name,
            is_training=True,
            use_grayscale=FLAGS.use_grayscale)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])
            label -= FLAGS.labels_offset

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image, train_image_size,
                                           train_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()
            logits, end_points = network_fn(images)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                slim.losses.softmax_cross_entropy(
                    end_points['AuxLogits'],
                    labels,
                    label_smoothing=FLAGS.label_smoothing,
                    weights=0.4,
                    scope='aux_loss')
            slim.losses.softmax_cross_entropy(
                logits,
                labels,
                label_smoothing=FLAGS.label_smoothing,
                weights=1.0)
            return end_points

        # Gather initial summaries.

        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs

        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #if FLAGS.quantize_delay >= 0:
        #  quantize.create_training_graph(quant_delay=FLAGS.quantize_delay) #for debugging!!

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                total_num_replicas=FLAGS.worker_replicas,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        if horovod_enabled():
            hvd.broadcast_global_variables(0)
        ###########################
        # Kicks off the training. #
        ###########################
        with dump_callback():
            with logger.benchmark_context(FLAGS):
                eps1 = ExamplesPerSecondKerasHook(FLAGS.log_every_n_steps,
                                                  output_dir=FLAGS.train_dir,
                                                  batch_size=FLAGS.batch_size)

                write_hparams_v1(
                    eps1.writer, {
                        'batch_size': FLAGS.batch_size,
                        **{x: getattr(FLAGS, x)
                           for x in FLAGS}
                    })

                train_step_kwargs = {}
                if FLAGS.max_number_of_steps:
                    should_stop_op = math_ops.greater_equal(
                        global_step, FLAGS.max_number_of_steps)
                else:
                    should_stop_op = constant_op.constant(False)
                train_step_kwargs['should_stop'] = should_stop_op
                if FLAGS.log_every_n_steps > 0:
                    train_step_kwargs['should_log'] = math_ops.equal(
                        math_ops.mod(global_step, FLAGS.log_every_n_steps), 0)

                eps1.on_train_begin()
                train_step_kwargs['EPS'] = eps1

                slim.learning.train(
                    train_tensor,
                    logdir=FLAGS.train_dir,
                    train_step_fn=train_step1,
                    train_step_kwargs=train_step_kwargs,
                    master=FLAGS.master,
                    is_chief=(FLAGS.task == 0),
                    init_fn=_get_init_fn(),
                    summary_op=summary_op,
                    summary_writer=None,
                    number_of_steps=FLAGS.max_number_of_steps,
                    log_every_n_steps=FLAGS.log_every_n_steps,
                    save_summaries_secs=FLAGS.save_summaries_secs,
                    save_interval_secs=FLAGS.save_interval_secs,
                    sync_optimizer=optimizer if FLAGS.sync_replicas else None)
Ejemplo n.º 48
0
def train(train_op,
          logdir,
          train_step_fn=train_step,
          train_step_kwargs=_USE_DEFAULT,
          log_every_n_steps=1,
          graph=None,
          master='',
          is_chief=True,
          global_step=None,
          number_of_steps=None,
          init_op=_USE_DEFAULT,
          init_feed_dict=None,
          local_init_op=_USE_DEFAULT,
          init_fn=None,
          ready_op=_USE_DEFAULT,
          summary_op=_USE_DEFAULT,
          save_summaries_secs=600,
          summary_writer=_USE_DEFAULT,
          startup_delay_steps=0,
          saver=None,
          save_interval_secs=600,
          sync_optimizer=None,
          session_config=None,
          session_wrapper=None,
          trace_every_n_steps=None,
          ignore_live_threads=False):
  """Runs a training loop using a TensorFlow supervisor.

  When the sync_optimizer is supplied, gradient updates are applied
  synchronously. Otherwise, gradient updates are applied asynchronous.

  Args:
    train_op: A `Tensor` that, when executed, will apply the gradients and
      return the loss value.
    logdir: The directory where training logs are written to. If None, model
      checkpoints and summaries will not be written.
    train_step_fn: The function to call in order to execute a single gradient
      step. The function must have take exactly four arguments: the current
      session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary.
    train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By
      default, two `Boolean`, scalar ops called "should_stop" and "should_log"
      are provided.
    log_every_n_steps: The frequency, in terms of global steps, that the loss
      and global step are logged.
    graph: The graph to pass to the supervisor. If no graph is supplied the
      default graph is used.
    master: The address of the tensorflow master.
    is_chief: Specifies whether or not the training is being run by the primary
      replica during replica training.
    global_step: The `Tensor` representing the global step. If left as `None`,
      then training_util.get_or_create_global_step(), that is,
      tf.contrib.framework.global_step() is used.
    number_of_steps: The max number of gradient steps to take during training,
      as measured by 'global_step': training will stop if global_step is
      greater than 'number_of_steps'. If the value is left as None, training
      proceeds indefinitely.
    init_op: The initialization operation. If left to its default value, then
      the session is initialized by calling `tf.global_variables_initializer()`.
    init_feed_dict: A feed dictionary to use when executing the `init_op`.
    local_init_op: The local initialization operation. If left to its default
      value, then the session is initialized by calling
      `tf.local_variables_initializer()` and `tf.tables_initializer()`.
    init_fn: An optional callable to be executed after `init_op` is called. The
      callable must accept one argument, the session being initialized.
    ready_op: Operation to check if the model is ready to use. If left to its
      default value, then the session checks for readiness by calling
      `tf.report_uninitialized_variables()`.
    summary_op: The summary operation.
    save_summaries_secs: How often, in seconds, to save summaries.
    summary_writer: `SummaryWriter` to use.  Can be `None`
      to indicate that no summaries should be written. If unset, we
      create a SummaryWriter.
    startup_delay_steps: The number of steps to wait for before beginning. Note
      that this must be 0 if a sync_optimizer is supplied.
    saver: Saver to save checkpoints. If None, a default one will be created
      and used.
    save_interval_secs: How often, in seconds, to save the model to `logdir`.
    sync_optimizer: an instance of tf.train.SyncReplicasOptimizer, or a list of
      them. If the argument is supplied, gradient updates will be synchronous.
      If left as `None`, gradient updates will be asynchronous.
    session_config: An instance of `tf.ConfigProto` that will be used to
      configure the `Session`. If left as `None`, the default will be used.
    session_wrapper: A function that takes a `tf.Session` object as the only
      argument and returns a wrapped session object that has the same methods
      that the original object has, or `None`. Iff not `None`, the wrapped
      object will be used for training.
    trace_every_n_steps: produce and save a `Timeline` in Chrome trace format
      and add it to the summaries every `trace_every_n_steps`. If None, no trace
      information will be produced or saved.
    ignore_live_threads: If `True` ignores threads that remain running after
      a grace period when stopping the supervisor, instead of raising a
      RuntimeError.

  Returns:
    the value of the loss function after training.

  Raises:
    ValueError: if `train_op` is empty or if `startup_delay_steps` is
      non-zero when `sync_optimizer` is supplied, if `number_of_steps` is
      negative, or if `trace_every_n_steps` is not `None` and no `logdir` is
      provided.
  """
  if train_op is None:
    raise ValueError('train_op cannot be None.')

  if logdir is None:
    if summary_op != _USE_DEFAULT:
      raise ValueError('Cannot provide summary_op because logdir=None')
    if saver is not None:
      raise ValueError('Cannot provide saver because logdir=None')
    if trace_every_n_steps is not None:
      raise ValueError('Cannot provide trace_every_n_steps because '
                       'logdir=None')

  if isinstance(sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer):
    sync_optimizer = [sync_optimizer]
  if sync_optimizer is not None and startup_delay_steps > 0:
    raise ValueError(
        'startup_delay_steps must be zero when sync_optimizer is supplied.')

  if number_of_steps is not None and number_of_steps <= 0:
    raise ValueError(
        '`number_of_steps` must be either None or a positive number.')

  graph = graph or ops.get_default_graph()
  with graph.as_default():
    if global_step is None:
      global_step = training_util.get_or_create_global_step()
    saver = saver or tf_saver.Saver()

    if sync_optimizer is not None:
      for opt in sync_optimizer:
        if not isinstance(opt, sync_replicas_optimizer.SyncReplicasOptimizer):
          raise ValueError(
              '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer.')

    with ops.name_scope('init_ops'):
      if init_op == _USE_DEFAULT:
        init_op = variables.global_variables_initializer()

      if ready_op == _USE_DEFAULT:
        ready_op = variables.report_uninitialized_variables()

      if local_init_op == _USE_DEFAULT:
        local_init_op = control_flow_ops.group(
            variables.local_variables_initializer(),
            lookup_ops.tables_initializer())

      if sync_optimizer is not None and isinstance(sync_optimizer, list):
        with ops.control_dependencies([local_init_op] if local_init_op is
                                      not None else []):
          if is_chief:
            local_init_op = control_flow_ops.group(
                *[opt.chief_init_op for opt in sync_optimizer])
          else:
            local_init_op = control_flow_ops.group(
                *[opt.local_step_init_op for opt in sync_optimizer])
        ready_for_local_init_op = control_flow_ops.group(
            *[opt.ready_for_local_init_op for opt in sync_optimizer])
      else:
        ready_for_local_init_op = None

    if summary_op == _USE_DEFAULT:
      summary_op = summary.merge_all()

    if summary_writer == _USE_DEFAULT:
      summary_writer = supervisor.Supervisor.USE_DEFAULT

    if is_chief and sync_optimizer is not None:
      # Need to create these BEFORE the supervisor finalizes the graph:
      init_tokens_op = [opt.get_init_tokens_op() for opt in sync_optimizer]
      chief_queue_runner = [
          opt.get_chief_queue_runner() for opt in sync_optimizer]

    if train_step_kwargs == _USE_DEFAULT:
      with ops.name_scope('train_step'):
        train_step_kwargs = {}

        if number_of_steps:
          should_stop_op = math_ops.greater_equal(global_step, number_of_steps)
        else:
          should_stop_op = constant_op.constant(False)
        train_step_kwargs['should_stop'] = should_stop_op
        if log_every_n_steps > 0:
          train_step_kwargs['should_log'] = math_ops.equal(
              math_ops.mod(global_step, log_every_n_steps), 0)
        if is_chief and trace_every_n_steps is not None:
          train_step_kwargs['should_trace'] = math_ops.equal(
              math_ops.mod(global_step, trace_every_n_steps), 0)
          train_step_kwargs['logdir'] = logdir

  sv = supervisor.Supervisor(
      graph=graph,
      is_chief=is_chief,
      logdir=logdir,
      init_op=init_op,
      init_feed_dict=init_feed_dict,
      local_init_op=local_init_op,
      ready_for_local_init_op=ready_for_local_init_op,
      ready_op=ready_op,
      summary_op=summary_op,
      summary_writer=summary_writer,
      global_step=global_step,
      saver=saver,
      save_summaries_secs=save_summaries_secs,
      save_model_secs=save_interval_secs,
      init_fn=init_fn)

  if summary_writer is not None:
    train_step_kwargs['summary_writer'] = sv.summary_writer

  total_loss = None
  should_retry = True
  while should_retry:
    try:
      should_retry = False
      with sv.managed_session(
          master, start_standard_services=False, config=session_config) as sess:
        logging.info('Starting Session.')
        if session_wrapper is not None:
          logging.info(
              'Wrapping session with wrapper function: %s', session_wrapper)
          sess = session_wrapper(sess)
        if is_chief:
          if logdir:
            sv.start_standard_services(sess)
        elif startup_delay_steps > 0:
           # (use sys.maxsize because sys.maxint doesn't exist in Python 3)
          _wait_for_step(sess, global_step,
                         min(startup_delay_steps, number_of_steps or
                             sys.maxsize))
        threads = sv.start_queue_runners(sess)
        logging.info('Starting Queues.')
        if is_chief and sync_optimizer is not None:
          sv.start_queue_runners(sess, chief_queue_runner)
          sess.run(init_tokens_op)
        try:
          while not sv.should_stop():
            total_loss, should_stop = train_step_fn(
                sess, train_op, global_step, train_step_kwargs)
            if should_stop:
              logging.info('Stopping Training.')
              sv.request_stop()
              break
        except errors.OutOfRangeError as e:
          # OutOfRangeError is thrown when epoch limit per
          # tf.train.limit_epochs is reached.
          logging.info('Caught OutOfRangeError. Stopping Training. %s', e)
        if logdir and sv.is_chief:
          logging.info('Finished training! Saving model to disk.')
          sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
          sv.stop(
              threads,
              close_summary_writer=True,
              ignore_live_threads=ignore_live_threads)

    except errors.AbortedError:
      # Always re-run on AbortedError as it indicates a restart of one of the
      # distributed tensorflow servers.
      logging.info('Retrying training!')
      should_retry = True

  return total_loss
Ejemplo n.º 49
0
 def filter_fn(elem_index, _):
     mod_result = math_ops.mod(elem_index, task_spec.num_workers)
     return math_ops.equal(mod_result, task_spec.index)
Ejemplo n.º 50
0
def train(train_op,
          logdir,
          metric_op=None,
          metric_collection_name=None,
          train_step_fn=train_step,
          train_step_kwargs=_USE_DEFAULT,
          log_every_n_steps=1,
          graph=None,
          master='',
          is_chief=True,
          global_step=None,
          number_of_samples=None,
          number_of_steps=None,
          number_of_epochs=None,
          batch_size=None,
          init_op=_USE_DEFAULT,
          init_feed_dict=None,
          local_init_op=_USE_DEFAULT,
          init_fn=None,
          ready_op=_USE_DEFAULT,
          summary_op=_USE_DEFAULT,
          save_summaries_secs=600,
          summary_writer=_USE_DEFAULT,
          startup_delay_steps=0,
          saver=None,
          save_interval_secs=600,
          sync_optimizer=None,
          session_config=None,
          trace_every_n_steps=None):
    """Runs a training loop using a TensorFlow supervisor.

    When the sync_optimizer is supplied, gradient updates are applied
    synchronously. Otherwise, gradient updates are applied asynchronous.

    Args:
      train_op: A `Tensor` that, when executed, will apply the gradients and
        return the loss value.

      metric_op: A `Tensor` that, when executed, will update the streaming_metrics ops.
      metric_collection_name: The name associated with the metric_op.
      logdir: The directory where training logs are written to. If None, model
        checkpoints and summaries will not be written.
      train_step_fn: The function to call in order to execute a single gradient
        step. The function must have take exactly four arguments: the current
        session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary.
      train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By
        default, two `Boolean`, scalar ops called "should_stop" and "should_log"
        are provided.
      log_every_n_steps: The frequency, in terms of global steps, that the loss
        and global step and logged.
      graph: The graph to pass to the supervisor. If no graph is supplied the
        default graph is used.
      master: The address of the tensorflow master.
      is_chief: Specifies whether or not the training is being run by the primary
        replica during replica training.
      global_step: The `Tensor` representing the global step. If left as `None`,
        then slim.variables.get_or_create_global_step() is used.
      number_of_steps: The max number of gradient steps to take during training,
        as measured by 'global_step': training will stop if global_step is
        greater than 'number_of_steps'. If the value is left as None, training
        proceeds indefinitely.
      number_of_epochs: The total number of epochs per training.
      batch_size: The number of samples in each batch.
      init_op: The initialization operation. If left to its default value, then
        the session is initialized by calling `tf.global_variables_initializer()`.
      init_feed_dict: A feed dictionary to use when executing the `init_op`.
      local_init_op: The local initialization operation. If left to its default
        value, then the session is initialized by calling
        `tf.local_variables_initializer()` and `tf.tables_initializer()`.
      init_fn: An optional callable to be executed after `init_op` is called. The
        callable must accept one argument, the session being initialized.
      ready_op: Operation to check if the model is ready to use. If left to its
        default value, then the session checks for readiness by calling
        `tf.report_uninitialized_variables()`.
      summary_op: The summary operation.
      save_summaries_secs: How often, in seconds, to save summaries.
      summary_writer: `SummaryWriter` to use.  Can be `None`
        to indicate that no summaries should be written. If unset, we
        create a SummaryWriter.
      startup_delay_steps: The number of steps to wait for before beginning. Note
        that this must be 0 if a sync_optimizer is supplied.
      saver: Saver to save checkpoints. If None, a default one will be created
        and used.
      save_interval_secs: How often, in seconds, to save the model to `logdir`.
      sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the
        argument is supplied, gradient updates will be synchronous. If left as
        `None`, gradient updates will be asynchronous.
      session_config: An instance of `tf.ConfigProto` that will be used to
        configure the `Session`. If left as `None`, the default will be used.
      trace_every_n_steps: produce and save a `Timeline` in Chrome trace format
        and add it to the summaries every `trace_every_n_steps`. If None, no trace
        information will be produced or saved.

    Returns:
      the value of the loss function after training.

    Raises:
      ValueError: if `train_op` is empty or if `startup_delay_steps` is
        non-zero when `sync_optimizer` is supplied, if `number_of_steps` is
        negative, or if `trace_every_n_steps` is not `None` and no `logdir` is
        provided.
    """

    # Check if the calculation of some metrics is desired.
    if metric_op is not None:

        # Check the necessary requirements
        if metric_collection_name is None:
            raise ValueError('metric_collection_name must be fed and cannot be No')
        if number_of_samples is None:
            raise ValueError('number_of_samples must be fed and cannot be No')
        if number_of_steps is None:
            raise ValueError('number_of_steps must be fed and cannot be No')
        if number_of_epochs is None:
            raise ValueError('number_of_epochs must be fed and cannot be No')
        if batch_size is None:
            raise ValueError('batch_size must be fed and cannot be None')

    if train_op is None:
        raise ValueError('train_op cannot be None.')

    if logdir is None:
        if summary_op != _USE_DEFAULT:
            raise ValueError('Cannot provide summary_op because logdir=None')
        if saver is not None:
            raise ValueError('Cannot provide saver because logdir=None')
        if trace_every_n_steps is not None:
            raise ValueError('Cannot provide trace_every_n_steps because '
                             'logdir=None')

    if sync_optimizer is not None and startup_delay_steps > 0:
        raise ValueError(
            'startup_delay_steps must be zero when sync_optimizer is supplied.')

    if number_of_steps is not None and number_of_steps <= 0:
        raise ValueError(
            '`number_of_steps` must be either None or a positive number.')

    graph = graph or ops.get_default_graph()
    with graph.as_default():
        if global_step is None:
            global_step = variables.get_or_create_global_step()
        saver = saver or tf_saver.Saver()

        with ops.name_scope('init_ops'):
            if init_op == _USE_DEFAULT:
                init_op = tf_variables.global_variables_initializer()

            if ready_op == _USE_DEFAULT:
                ready_op = tf_variables.report_uninitialized_variables()

            if local_init_op == _USE_DEFAULT:
                local_init_op = control_flow_ops.group(
                    tf_variables.local_variables_initializer(),
                    lookup_ops.tables_initializer())

            if sync_optimizer is not None and isinstance(
                    sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer):
                with ops.control_dependencies([local_init_op] if local_init_op is
                not None else []):
                    if is_chief:
                        local_init_op = sync_optimizer.chief_init_op
                    else:
                        local_init_op = sync_optimizer.local_step_init_op
                ready_for_local_init_op = sync_optimizer.ready_for_local_init_op
            else:
                ready_for_local_init_op = None

        if summary_op == _USE_DEFAULT:
            summary_op = summary.merge_all()

        if summary_writer == _USE_DEFAULT:
            summary_writer = supervisor.Supervisor.USE_DEFAULT

        if is_chief and sync_optimizer is not None:
            if not isinstance(sync_optimizer,
                              (sync_replicas_optimizer.SyncReplicasOptimizer)):
                raise ValueError(
                    '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer.')

            # Need to create these BEFORE the supervisor finalizes the graph:
            init_tokens_op = sync_optimizer.get_init_tokens_op()
            chief_queue_runner = sync_optimizer.get_chief_queue_runner()

        if train_step_kwargs == _USE_DEFAULT:
            with ops.name_scope('train_step'):
                train_step_kwargs = {}

                if number_of_steps:
                    should_stop_op = math_ops.greater_equal(global_step, number_of_steps)
                else:
                    should_stop_op = constant_op.constant(False)
                train_step_kwargs['should_stop'] = should_stop_op
                train_step_kwargs['should_log'] = math_ops.equal(
                    math_ops.mod(global_step, log_every_n_steps), 0)
                if is_chief and trace_every_n_steps is not None:
                    train_step_kwargs['should_trace'] = math_ops.equal(
                        math_ops.mod(global_step, trace_every_n_steps), 0)
                    train_step_kwargs['logdir'] = logdir
                if number_of_samples is not None and batch_size is not None:
                    train_step_kwargs['num_batches_per_epoch'] = int(number_of_samples / float(batch_size))
                if number_of_samples is not None and batch_size is not None:
                    train_step_kwargs['num_steps_per_epoch'] = int(number_of_steps / float(number_of_epochs))

        # If metric calculation is desired.
        if metric_op is not None:
            # The reset_op is defined for resetting the streaming_variables(streaming_acurracy,...)
            # The reason for defining it here is that the supervisor finalized the graph and the graph will be fixed after it.
            # By calling the reset_op in the train_step function, after each epoch the total & count variables will reset to zero.
            # This help to have the averaged accuracy per epoch which is useful to realize if we are getting to the highest accuracy in the training.
            stream_vars = [i for i in tf.local_variables() if i.name.split('/')[1] == metric_collection_name]
            reset_op = tf.variables_initializer(stream_vars)

    sv = supervisor.Supervisor(
        graph=graph,
        is_chief=is_chief,
        logdir=logdir,
        init_op=init_op,
        init_feed_dict=init_feed_dict,
        local_init_op=local_init_op,
        ready_for_local_init_op=ready_for_local_init_op,
        ready_op=ready_op,
        summary_op=summary_op,
        summary_writer=summary_writer,
        global_step=global_step,
        saver=saver,
        save_summaries_secs=save_summaries_secs,
        save_model_secs=save_interval_secs,
        init_fn=init_fn)

    if summary_writer is not None:
        train_step_kwargs['summary_writer'] = sv.summary_writer

    should_retry = True
    while should_retry:
        try:
            should_retry = False

            with sv.managed_session(
                    master, start_standard_services=False, config=session_config) as sess:

                logging.info('Starting Session.')
                if is_chief:
                    if logdir:
                        sv.start_standard_services(sess)
                elif startup_delay_steps > 0:
                    _wait_for_step(sess, global_step,
                                   min(startup_delay_steps, number_of_steps or
                                       sys.maxint))
                sv.start_queue_runners(sess)
                logging.info('Starting Queues.')
                if is_chief and sync_optimizer is not None:
                    sv.start_queue_runners(sess, [chief_queue_runner])
                    sess.run(init_tokens_op)
                try:
                    while not sv.should_stop():
                        if metric_op is not None:
                            total_loss, should_stop = train_step_fn(
                                sess, train_op, global_step, train_step_kwargs, metric_op, reset_op)
                        else:
                            total_loss, should_stop = train_step_fn(
                                sess, train_op, global_step, train_step_kwargs)
                        if should_stop:
                            logging.info('Stopping Training.')
                            break
                except errors.OutOfRangeError:
                    # OutOfRangeError is thrown when epoch limit per
                    # tf.train.limit_epochs is reached.
                    logging.info('Caught OutOfRangeError. Stopping Training.')
                if logdir and sv.is_chief:
                    logging.info('Finished training! Saving model to disk.')
                    sv.saver.save(sess, sv.save_path, global_step=sv.global_step)

        except errors.AbortedError:
            # Always re-run on AbortedError as it indicates a restart of one of the
            # distributed tensorflow servers.
            logging.info('Retrying training!')
            should_retry = True

    return total_loss
Ejemplo n.º 51
0
 def mod():
   with ops.device('/device:GPU:0'):
     a = constant_op.constant(1.0)
     b = constant_op.constant(1.0)
     return math_ops.mod(a, b)
Ejemplo n.º 52
0
def atrous_conv2d(value, filters, rate, padding, name=None):
    """Atrous convolution (a.k.a. convolution with holes or dilated convolution).

  Computes a 2-D atrous convolution, also known as convolution with holes or
  dilated convolution, given 4-D `value` and `filters` tensors. If the `rate`
  parameter is equal to one, it performs regular 2-D convolution. If the `rate`
  parameter is greater than one, it performs convolution with holes, sampling
  the input values every `rate` pixels in the `height` and `width` dimensions.
  This is equivalent to convolving the input with a set of upsampled filters,
  produced by inserting `rate - 1` zeros between two consecutive values of the
  filters along the `height` and `width` dimensions, hence the name atrous
  convolution or convolution with holes (the French word trous means holes in
  English).

  More specifically:

      output[b, i, j, k] = sum_{di, dj, q} filters[di, dj, q, k] *
            value[b, i + rate * di, j + rate * dj, q]

  Atrous convolution allows us to explicitly control how densely to compute
  feature responses in fully convolutional networks. Used in conjunction with
  bilinear interpolation, it offers an alternative to `conv2d_transpose` in
  dense prediction tasks such as semantic image segmentation, optical flow
  computation, or depth estimation. It also allows us to effectively enlarge
  the field of view of filters without increasing the number of parameters or
  the amount of computation.

  For a description of atrous convolution and how it can be used for dense
  feature extraction, please see: [Semantic Image Segmentation with Deep
  Convolutional Nets and Fully Connected CRFs](http://arxiv.org/abs/1412.7062).
  The same operation is investigated further in [Multi-Scale Context Aggregation
  by Dilated Convolutions](http://arxiv.org/abs/1511.07122). Previous works
  that effectively use atrous convolution in different ways are, among others,
  [OverFeat: Integrated Recognition, Localization and Detection using
  Convolutional Networks](http://arxiv.org/abs/1312.6229) and [Fast Image
  Scanning with Deep Max-Pooling Convolutional Neural Networks]
  (http://arxiv.org/abs/1302.1700). Atrous convolution is also closely related
  to the so-called noble identities in multi-rate signal processing.

  There are many different ways to implement atrous convolution (see the refs
  above). The implementation here reduces

      atrous_conv2d(value, filters, rate, padding=padding)

  to the following three operations:

      paddings = ...
      net = space_to_batch(value, paddings, block_size=rate)
      net = conv2d(net, filters, strides=[1, 1, 1, 1], padding="VALID")
      crops = ...
      net = batch_to_space(net, crops, block_size=rate)

  Advanced usage. Note the following optimization: A sequence of `atrous_conv2d`
  operations with identical `rate` parameters, 'SAME' `padding`, and filters
  with odd heights/ widths:

      net = atrous_conv2d(net, filters1, rate, padding="SAME")
      net = atrous_conv2d(net, filters2, rate, padding="SAME")
      ...
      net = atrous_conv2d(net, filtersK, rate, padding="SAME")

  can be equivalently performed cheaper in terms of computation and memory as:

      pad = ...  # padding so that the input dims are multiples of rate
      net = space_to_batch(net, paddings=pad, block_size=rate)
      net = conv2d(net, filters1, strides=[1, 1, 1, 1], padding="SAME")
      net = conv2d(net, filters2, strides=[1, 1, 1, 1], padding="SAME")
      ...
      net = conv2d(net, filtersK, strides=[1, 1, 1, 1], padding="SAME")
      net = batch_to_space(net, crops=pad, block_size=rate)

  because a pair of consecutive `space_to_batch` and `batch_to_space` ops with
  the same `block_size` cancel out when their respective `paddings` and `crops`
  inputs are identical.

  Args:
    value: A 4-D `Tensor` of type `float`. It needs to be in the default "NHWC"
      format. Its shape is `[batch, in_height, in_width, in_channels]`.
    filters: A 4-D `Tensor` with the same type as `value` and shape
      `[filter_height, filter_width, in_channels, out_channels]`. `filters`'
      `in_channels` dimension must match that of `value`. Atrous convolution is
      equivalent to standard convolution with upsampled filters with effective
      height `filter_height + (filter_height - 1) * (rate - 1)` and effective
      width `filter_width + (filter_width - 1) * (rate - 1)`, produced by
      inserting `rate - 1` zeros along consecutive elements across the
      `filters`' spatial dimensions.
    rate: A positive int32. The stride with which we sample input values across
      the `height` and `width` dimensions. Equivalently, the rate by which we
      upsample the filter values by inserting zeros across the `height` and
      `width` dimensions. In the literature, the same parameter is sometimes
      called `input stride` or `dilation`.
    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
    name: Optional name for the returned tensor.

  Returns:
    A `Tensor` with the same type as `value`.

  Raises:
    ValueError: If input/output depth does not match `filters`' shape, or if
      padding is other than `'VALID'` or `'SAME'`.
  """
    with ops.op_scope([value, filters], name, "atrous_conv2d") as name:
        value = ops.convert_to_tensor(value, name="value")
        filters = ops.convert_to_tensor(filters, name="filters")
        value_shape = value.get_shape()
        filter_shape = filters.get_shape()
        if not value_shape[3].is_compatible_with(filter_shape[2]):
            raise ValueError(
                "value's input channels does not match filters' input channels, "
                "{} != {}".format(value_shape[3], filter_shape[2]))
        if rate < 1:
            raise ValueError("rate {} cannot be less than one".format(rate))

        if rate == 1:
            value = gen_nn_ops.conv2d(input=value,
                                      filter=filters,
                                      strides=[1, 1, 1, 1],
                                      padding=padding)
            return value

        # We have two padding contributions. The first is used for converting "SAME"
        # to "VALID". The second is required so that the height and width of the
        # zero-padded value tensor are multiples of rate.

        # Spatial dimensions of original input
        value_shape = array_ops.shape(value)
        in_height = value_shape[1]
        in_width = value_shape[2]

        # Spatial dimensions of the filters and the upsampled filters in which we
        # introduce (rate - 1) zeros between consecutive filter values.
        filter_height = int(filter_shape[0])
        filter_width = int(filter_shape[1])
        filter_height_up = filter_height + (filter_height - 1) * (rate - 1)
        filter_width_up = filter_width + (filter_width - 1) * (rate - 1)

        # Padding required to reduce to "VALID" convolution
        if padding == "SAME":
            pad_height = filter_height_up - 1
            pad_width = filter_width_up - 1
        elif padding == "VALID":
            pad_height = 0
            pad_width = 0
        else:
            raise ValueError("Invalid padding")
        # When padding is "SAME" and the pad_height (pad_width) is odd, we pad more
        # to bottom (right), following the same convention as conv2d().
        pad_top = math_ops.floordiv(pad_height, 2)
        pad_bottom = pad_height - pad_top
        pad_left = math_ops.floordiv(pad_width, 2)
        pad_right = pad_width - pad_left

        # More padding so that rate divides the height and width of the input value
        in_height = in_height + pad_top + pad_bottom
        in_width = in_width + pad_left + pad_right

        mod_height = math_ops.mod(in_height, rate)
        mod_width = math_ops.mod(in_width, rate)
        null = constant_op.constant(0)
        pad_bottom_extra = control_flow_ops.cond(
            gen_math_ops.equal(mod_height, 0), lambda: null,
            lambda: rate - mod_height)
        pad_right_extra = control_flow_ops.cond(
            gen_math_ops.equal(mod_width, 0), lambda: null,
            lambda: rate - mod_width)

        # The paddings argument to space_to_batch includes both padding components
        pad_bottom = pad_bottom + pad_bottom_extra
        pad_right = pad_right + pad_right_extra

        space_to_batch_pad = [[pad_top, pad_bottom], [pad_left, pad_right]]

        value = array_ops.space_to_batch(input=value,
                                         paddings=space_to_batch_pad,
                                         block_size=rate)

        value = gen_nn_ops.conv2d(input=value,
                                  filter=filters,
                                  strides=[1, 1, 1, 1],
                                  padding="VALID",
                                  name=name)

        # The crops argument to batch_to_space is just the extra padding component
        batch_to_space_crop = [[0, pad_bottom_extra], [0, pad_right_extra]]
        value = array_ops.batch_to_space(input=value,
                                         crops=batch_to_space_crop,
                                         block_size=rate)

        return value
Ejemplo n.º 53
0
def rotate_transpose(x, shift, name="rotate_transpose"):
    """Circularly moves dims left or right.

  Effectively identical to:

  ```python
  numpy.transpose(x, numpy.roll(numpy.arange(len(x.shape)), shift))
  ```

  When `validate_args=False` additional graph-runtime checks are
  performed. These checks entail moving data from to GPU to CPU.

  Example:

    ```python
    x = ...  # Tensor of shape [1, 2, 3, 4].
    rotate_transpose(x, -1)  # result shape: [2, 3, 4, 1]
    rotate_transpose(x, -2)  # result shape: [3, 4, 1, 2]
    rotate_transpose(x,  1)  # result shape: [4, 1, 2, 3]
    rotate_transpose(x,  2)  # result shape: [3, 4, 1, 2]
    rotate_transpose(x, 7) == rotate_transpose(x, 3)
    rotate_transpose(x, -7) == rotate_transpose(x, -3)
    ```

  Args:
    x: `Tensor`.
    shift: `Tensor`. Number of dimensions to transpose left (shift<0) or
      transpose right (shift>0).
    name: Python `str`. The name to give this op.

  Returns:
    rotated_x: Input `Tensor` with dimensions circularly rotated by shift.

  Raises:
    TypeError: if shift is not integer type.
  """
    with ops.name_scope(name, values=[x, shift]):
        x = ops.convert_to_tensor(x, name="x")
        shift = ops.convert_to_tensor(shift, name="shift")
        # We do not assign back to preserve constant-ness.
        check_ops.assert_integer(shift)
        shift_value_static = tensor_util.constant_value(shift)
        ndims = x.get_shape().ndims
        if ndims is not None and shift_value_static is not None:
            if ndims < 2: return x
            shift_value_static = np.sign(shift_value_static) * (
                abs(shift_value_static) % ndims)
            if shift_value_static == 0: return x
            perm = np.roll(np.arange(ndims), shift_value_static)
            return array_ops.transpose(x, perm=perm)
        else:
            # Consider if we always had a positive shift, and some specified
            # direction.
            # When shifting left we want the new array:
            #   last(x, n-shift) + first(x, shift)
            # and if shifting right then we want:
            #   last(x, shift) + first(x, n-shift)
            # Observe that last(a) == slice(a, n) and first(a) == slice(0, a).
            # Also, we can encode direction and shift as one: direction * shift.
            # Combining these facts, we have:
            #   a = cond(shift<0, -shift, n-shift)
            #   last(x, n-a) + first(x, a) == x[a:n] + x[0:a]
            # Finally, we transform shift by modulo length so it can be specified
            # independently from the array upon which it operates (like python).
            ndims = array_ops.rank(x)
            shift = array_ops.where(math_ops.less(shift, 0),
                                    math_ops.mod(-shift, ndims),
                                    ndims - math_ops.mod(shift, ndims))
            first = math_ops.range(0, shift)
            last = math_ops.range(shift, ndims)
            perm = array_ops.concat([last, first], 0)
            return array_ops.transpose(x, perm=perm)
Ejemplo n.º 54
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_biasCNN.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
    
    dataset_val = dataset_biasCNN.get_dataset(
        FLAGS.dataset_name, 'validation', FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################
    
    if FLAGS.weights_initializer is None:
      weights_initializer = None
      # default value will be defined in argscope, it is xavier_initializer
    elif FLAGS.weights_initializer=='zeros':
      weights_initializer = tf.zeros_initializer()
    elif FLAGS.weights_initializer=='ones':
      weights_initializer = tf.ones_initializer()
    elif FLAGS.weights_initializer=='trunc_normal':
      weights_initializer = tf.truncated_normal_initializer()
    elif FLAGS.weights_initializer=='xavier':
      weights_initializer = initializers.xavier_initializer()
    elif FLAGS.weights_initializer=='var_scaling':
      weights_initializer = initializers.variance_scaling_initializer()
    else:
      raise ValueError('weight initializer not found')
      
    if FLAGS.biases_initializer is None:
      biases_initializer = None
      # default value will be defined in argscope, it is zeros_initializer
    elif biases_initializer=='zeros':
       biases_initializer = tf.zeros_initializer()
    elif FLAGS.biases_initializer=='ones':
       biases_initializer = tf.ones_initializer()
    elif FLAGS.biases_initializer=='trunc_normal':
      biases_initializer = tf.truncated_normal_initializer()
    elif FLAGS.biases_initializer=='xavier':
      biases_initializer = initializers.xavier_initializer()
    elif FLAGS.biases_initializer=='var_scaling':
      biases_initializer = initializers.variance_scaling_initializer()
    else:
      raise ValueError('biases initializer not found')
    
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weight_decay=FLAGS.weight_decay,
        weights_initializer=weights_initializer,
        biases_initializer=biases_initializer,
        is_training=True)

    network_fn_val = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weights_initializer=weights_initializer,
        biases_initializer=biases_initializer,
        is_training=False)
    
    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing(
        preprocessing_name,
        is_training=True, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale, 
	is_windowed = FLAGS.is_windowed)

    image_preprocessing_fn_val = preprocessing_biasCNN.get_preprocessing(
        preprocessing_name,
        is_training=False, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale, 
	is_windowed=FLAGS.is_windowed)
    
    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=FLAGS.num_readers,
          common_queue_capacity=20 * FLAGS.batch_size,
          common_queue_min=10 * FLAGS.batch_size)
      [image, label] = provider.get(['image', 'label'])
      label -= FLAGS.labels_offset

      train_image_size = FLAGS.train_image_size or network_fn.default_image_size

      image = image_preprocessing_fn(image, train_image_size, train_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size)
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)
      
      ############################################
      # Create a provider for the validation set #
      ############################################
      provider_val = slim.dataset_data_provider.DatasetDataProvider(
          dataset_val,
          shuffle=True,
          common_queue_capacity=2 * FLAGS.batch_size_val,
          common_queue_min=FLAGS.batch_size_val)
      [image_val, label_val] = provider_val.get(['image', 'label'])
      label_val -= FLAGS.labels_offset
      
      eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size
  
      image_val = image_preprocessing_fn_val(image_val, eval_image_size, eval_image_size)
  
      images_val, labels_val = tf.train.batch(
          [image_val, label_val],
          batch_size=FLAGS.batch_size_val,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size_val)
      labels_val_onehot = slim.one_hot_encoding(
          labels_val, dataset.num_classes - FLAGS.labels_offset)
      
    ###############################
    # Define the model (training) #
    ###############################
    
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels = batch_queue.dequeue()
      
      with tf.variable_scope('my_scope'):
          logits, end_points = network_fn(images)

      #############################
      # Specify the loss function #
      #############################
      if 'AuxLogits' in end_points:
        slim.losses.softmax_cross_entropy(
            end_points['AuxLogits'], labels,
            label_smoothing=FLAGS.label_smoothing, weights=0.4,
            scope='aux_loss')
        
      tf.losses.softmax_cross_entropy(
          labels, logits, label_smoothing=FLAGS.label_smoothing, weights=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      x = end_points[end_point]
      # adding in a picture of the activations at each layer, this is a good way to double check that the rotated images look rotated to our eyes
      if 'conv' in end_point:
        dims = x.get_shape()
        for ii in range(5):
          summaries.add(tf.summary.image('image_out/' + end_point + '/image_' + str(ii), tf.slice(x,[ii,0,0,0],[1,dims[1],dims[2],1])))
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    if FLAGS.quantize_delay >= 0:
      tf.contrib.quantize.create_training_graph(
          quant_delay=FLAGS.quantize_delay)
      
    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
          opt=optimizer,
          replicas_to_aggregate=FLAGS.replicas_to_aggregate,
          total_num_replicas=FLAGS.worker_replicas,
          variable_averages=variable_averages,
          variables_to_average=moving_average_variables)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))
 
    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    #################################
    # Define the model (validation) #
    #################################
    
    # get the validation set logits (predictions)
    with tf.variable_scope('my_scope',reuse=True):
      logits_val, _ = network_fn_val(images_val)
        
    predictions_val = tf.argmax(logits_val, 1)
    
    # Define loss on validation set, add a summary
    tf.losses.softmax_cross_entropy(
      labels_val_onehot, logits_val, label_smoothing=FLAGS.label_smoothing, 
      weights=1.0, loss_collection = 'eval_losses')
    
    for loss in tf.get_collection('eval_losses'):
      summaries.add(tf.summary.scalar('eval_losses/%s' % loss.op.name, loss))
      
    # Define the validation set metrics: 
    # Will define each metric twice as separate operation. 
    # One set will be made resettable, the other set will be streaming.
    with tf.name_scope('eval_metrics'):
      eval_acc_value, eval_acc_op = tf.metrics.accuracy(predictions=predictions_val,labels=labels_val)    
      eval_recall_5_value, eval_recall_5_op = slim.metrics.streaming_recall_at_k(predictions=logits_val, labels=labels_val,k=5) 
      # add these variables as summaries for tensorboard
      summaries.add(tf.summary.scalar('eval_recall_5', eval_recall_5_value))
      summaries.add(tf.summary.scalar('eval_acc', eval_acc_value))
      
    with tf.name_scope('eval_metrics_streaming'):
      eval_acc_streaming_value, eval_acc_streaming_op = tf.metrics.accuracy(predictions=predictions_val,labels=labels_val) 
      eval_recall_5_streaming_value, eval_recall_5_streaming_op = slim.metrics.streaming_recall_at_k(predictions=logits_val, labels=labels_val,k=5) 
      # add these variables as summaries for tensorboard
      summaries.add(tf.summary.scalar('eval_recall_5_streaming', eval_recall_5_streaming_value))
      summaries.add(tf.summary.scalar('eval_acc_streaming', eval_acc_streaming_value))
    
   # also add summaries of all the local variables used to compute the eval metrics...
    for metric in tf.get_collection(tf.GraphKeys.METRIC_VARIABLES, 'eval_metrics'):
      summaries.add(tf.summary.scalar('%s' % metric.op.name, metric))
    for metric in tf.get_collection(tf.GraphKeys.METRIC_VARIABLES, 'eval_streaming_metrics'):
      summaries.add(tf.summary.scalar('%s' % metric.op.name, metric))

    # gather up all the variables that are used to compute eval metrics
    stream_vars = [i for i in tf.local_variables() if i.name.split('/')[0]=='eval_metrics']
    # make an operation that'll let us re-initialize just these vars.
    reset_op = tf.initialize_variables(stream_vars)
   
    # make an operation that'll let us run evaluation (all metrics)
    eval_op = list([eval_acc_op, eval_recall_5_op, eval_acc_streaming_op, eval_recall_5_streaming_op])
    
    # Gather validation summaries
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES))
    
    # Merge all summaries together (this includes training summaries too).
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    # Create a non-default saver so we don't delete all the old checkpoints.
    my_saver = tf_saver.Saver(max_to_keep=FLAGS.max_checkpoints_to_keep,
               keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,)
    
    # Create a non-default dictionary of options for train_step_fn
    # This is a hack that lets us pass everything we need to run evaluation, into the training loop function
    with ops.name_scope('train_step'):
        train_step_kwargs = {}

        if FLAGS.max_number_of_steps:
          should_stop_op = math_ops.greater_equal(global_step, FLAGS.max_number_of_steps)
        else:
          should_stop_op = constant_op.constant(False)
        train_step_kwargs['should_stop'] = should_stop_op
        if FLAGS.log_every_n_steps > 0:
          train_step_kwargs['should_log'] = math_ops.equal(
              math_ops.mod(global_step, FLAGS.log_every_n_steps), 0)
        train_step_kwargs['should_val'] = math_ops.equal(
                math_ops.mod(global_step, FLAGS.val_every_n_steps),0)
        train_step_kwargs['should_reset_eval_metrics'] = math_ops.equal(
                math_ops.mod(global_step, tf.to_int64(math_ops.multiply(FLAGS.reset_eval_metrics_every_n_vals, FLAGS.val_every_n_steps))),0)
        train_step_kwargs['eval_op'] = eval_op
        train_step_kwargs['reset_op'] = reset_op

  
    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=(FLAGS.task == 0),
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs,
        sync_optimizer=optimizer if FLAGS.sync_replicas else None,
        saver=my_saver, 
        train_step_fn=learning_biasCNN.train_step_fn,
        train_step_kwargs = train_step_kwargs)
Ejemplo n.º 55
0
 def defaults_two():
   return control_flow_ops.cond(
       math_ops.equal(math_ops.mod(x, 2), 0),
       multiply,
       divide,
       name="cond_mult")
 def _build_filter_range_graph(self, div):
   return dataset_ops.Dataset.range(100).filter(
       lambda x: math_ops.not_equal(math_ops.mod(x, div), 2))
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight,
                      coverage_penalty_weight, constrained_matrix):
    """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `ConstrainedBeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

  Returns:
    A new beam state.
  """
    static_batch_size = tensor_util.constant_value(batch_size)

    # Calculate the current lengths of the predictions
    prediction_lengths = beam_state.lengths
    previously_finished = beam_state.finished
    not_finished = tf.logical_not(previously_finished)

    # Calculate the total log probs for the new hypotheses
    # Final Shape: [batch_size, beam_width, vocab_size]
    #Amanda: Add Constrained
    #logits = tf.multiply(logits, constrained_matrix)
    #logits += constrained_matrix
    ##################################################
    step_log_probs = nn_ops.log_softmax(logits)
    step_log_probs = _mask_probs(step_log_probs, end_token,
                                 previously_finished)
    total_probs = array_ops.expand_dims(
        beam_state.log_probs, 2
    ) + step_log_probs  # For end beam: set to [-inf,-inf,-inf,-inf], else: set to sum of historical log prob

    # Calculate the continuation lengths by adding to all continuing beams.
    vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1]
    #lengths_to_add = array_ops.one_hot(
    #    indices=array_ops.tile(
    #        array_ops.reshape(end_token, [1, 1]), [batch_size, beam_width]),
    #    depth=vocab_size,
    #    on_value=constant_op.constant(0, dtype=dtypes.int64),
    #    off_value=constant_op.constant(1, dtype=dtypes.int64),
    #    dtype=dtypes.int64)
    lengths_to_add = array_ops.one_hot(indices=array_ops.fill(
        [batch_size, beam_width], end_token),
                                       depth=vocab_size,
                                       on_value=np.int64(0),
                                       off_value=np.int64(1),
                                       dtype=tf.int64)
    add_mask = math_ops.to_int64(not_finished)
    #add_mask = (1 - math_ops.to_int64(previously_finished))
    lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add
    new_prediction_lengths = (lengths_to_add +
                              array_ops.expand_dims(prediction_lengths, 2))

    #Amanda:Copy coverage penalty factor
    accumulated_attention_probs = None
    attention_probs = get_attention_probs(next_cell_state,
                                          coverage_penalty_weight)
    if attention_probs is not None:
        attention_probs *= tf.expand_dims(tf.to_float(not_finished), 2)
        accumulated_attention_probs = (beam_state.accumulated_attention_probs +
                                       attention_probs)

    # Calculate the scores for each beam
    scores = _get_scores(
        log_probs=total_probs,
        sequence_lengths=new_prediction_lengths,
        length_penalty_weight=length_penalty_weight,
        coverage_penalty_weight=coverage_penalty_weight,
        finished=previously_finished,
        accumulated_attention_probs=accumulated_attention_probs,
        constrained_matrix=constrained_matrix)

    time = ops.convert_to_tensor(time, name="time")
    # During the first time step we only consider the initial beam
    scores_shape = array_ops.shape(scores)
    scores_flat = control_flow_ops.cond(
        time > 0, lambda: array_ops.reshape(scores, [batch_size, -1]),
        lambda: scores[:, 0])
    num_available_beam = control_flow_ops.cond(
        time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]),
        lambda: math_ops.reduce_prod(scores_shape[2:]))

    # Pick the next beams according to the specified successors function
    next_beam_size = math_ops.minimum(
        ops.convert_to_tensor(beam_width,
                              dtype=dtypes.int32,
                              name="beam_width"), num_available_beam)
    next_beam_scores, word_indices = nn_ops.top_k(scores_flat,
                                                  k=next_beam_size)

    next_beam_scores.set_shape([static_batch_size, beam_width])
    word_indices.set_shape([static_batch_size, beam_width])

    # Pick out the probs, beam_ids, and states according to the chosen predictions
    next_beam_probs = _tensor_gather_helper(gather_indices=word_indices,
                                            gather_from=total_probs,
                                            batch_size=batch_size,
                                            range_size=beam_width * vocab_size,
                                            gather_shape=[-1],
                                            name="next_beam_probs")
    # Note: just doing the following
    #   math_ops.to_int32(word_indices % vocab_size,
    #       name="next_beam_word_ids")
    # would be a lot cleaner but for reasons unclear, that hides the results of
    # the op which prevents capturing it with tfdbg debug ops.
    raw_next_word_ids = math_ops.mod(word_indices,
                                     vocab_size,
                                     name="next_beam_word_ids")
    next_word_ids = math_ops.to_int32(raw_next_word_ids)
    next_beam_ids = math_ops.to_int32(word_indices / vocab_size,
                                      name="next_beam_parent_ids")

    # Append new ids to current predictions
    previously_finished = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=previously_finished,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[-1])
    next_finished = math_ops.logical_or(previously_finished,
                                        math_ops.equal(next_word_ids,
                                                       end_token),
                                        name="next_beam_finished")

    # Calculate the length of the next predictions.
    # 1. Finished beams remain unchanged
    # 2. Beams that are now finished (EOS predicted) remain unchanged
    # 3. Beams that are not yet finished have their length increased by 1
    lengths_to_add = math_ops.to_int64(
        math_ops.not_equal(next_word_ids, end_token))
    lengths_to_add = (1 - math_ops.to_int64(next_finished)) * lengths_to_add
    next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids,
                                                gather_from=beam_state.lengths,
                                                batch_size=batch_size,
                                                range_size=beam_width,
                                                gather_shape=[-1])
    next_prediction_len += lengths_to_add

    next_accumulated_attention_probs = ()
    if accumulated_attention_probs is not None:
        next_accumulated_attention_probs = _tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=accumulated_attention_probs,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1],
            name="next_accumulated_attention_probs")


# Pick out the cell_states according to the next_beam_ids. We use a
# different gather_shape here because the cell_state tensors, i.e.
# the tensors that would be gathered from, all have dimension
# greater than two and we need to preserve those dimensions.
# pylint: disable=g-long-lambda
    next_cell_state = nest.map_structure(
        lambda gather_from: _maybe_tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=gather_from,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1]), next_cell_state)
    # pylint: enable=g-long-lambda

    next_state = ConstrainedBeamSearchDecoderState(
        cell_state=next_cell_state,
        log_probs=next_beam_probs,
        lengths=next_prediction_len,
        finished=next_finished,
        accumulated_attention_probs=next_accumulated_attention_probs)

    output = ConstrainedBeamSearchDecoderOutput(
        #scores=next_beam_scores,
        scores=next_beam_probs,
        #scores = constrained_matrix,
        predicted_ids=next_word_ids,
        parent_ids=next_beam_ids)

    return output, next_state
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to variables.
    This contains most of the synchronization implementation.

    Args:
      grads_and_vars: List of (local_vars, gradients) pairs.
      global_step: Variable to increment by one after the variables have been
      updated. We need it to check staleness.
      name: Optional name for the returned operation. Default to the
        name passed to the Optimizer constructor.

    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and apply averages to local vars or an op to update vars locally.

    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
        if not grads_and_vars:
            raise ValueError("Must supply at least one variable")
        if global_step is None:
            raise ValueError("Global step is required")

        # Generate copy of all trainable variables
        self._generate_shared_variables()

        # Wraps the apply_gradients op of the parent optimizer
        apply_updates = self._opt.apply_gradients(grads_and_vars, global_step)

        # This function will be called whenever the global_step divides interval steps
        def _apply_averages():  # pylint: disable=missing-docstring
            # Collect local and global vars
            local_vars = [v for g, v in grads_and_vars if g is not None]
            global_vars = ops.get_collection_ref("global_model")
            # sync queue, place it in the ps
            with ops.colocate_with(self._global_step):
                sync_queue = data_flow_ops.FIFOQueue(-1, [dtypes.bool],
                                                     shapes=[[]],
                                                     shared_name="sync_queue")
            train_ops = []
            aggregated_vars = []
            with ops.name_scope(None, self._name + "/global"):
                for var, gvar in zip(local_vars, global_vars):
                    # pylint: disable=protected-access
                    # Get reference to the tensor, this works with Variable and ResourceVariable
                    var = ops.convert_to_tensor(var)
                    # Place the accumulator in the same ps as the corresponding global_var
                    with ops.device(gvar.device):
                        var_accum = data_flow_ops.ConditionalAccumulator(
                            var.dtype,
                            shape=var.get_shape(),
                            shared_name=gvar.name + "/var_accum")
                        # Add op to push local_var to accumulator
                        train_ops.append(
                            var_accum.apply_grad(var, local_step=global_step))
                        # Op to average the vars in the accumulator
                        aggregated_vars.append(
                            var_accum.take_grad(self._replicas_to_aggregate))
                        # Remember accumulator and corresponding device
                        self._accumulator_list.append((var_accum, gvar.device))
            # chief worker updates global vars and enqueues tokens to the sync queue
            if self._is_chief:
                update_ops = []
                # Make sure train_ops are run
                with ops.control_dependencies(train_ops):
                    # Update global_vars with average values
                    for avg_var, gvar in zip(aggregated_vars, global_vars):
                        with ops.device(gvar.device):
                            update_ops.append(state_ops.assign(gvar, avg_var))
                    # Update shared global_step
                    with ops.device(global_step.device):
                        update_ops.append(
                            state_ops.assign_add(self._global_step, 1))
                # After averaging, push tokens to the queue
                with ops.control_dependencies(update_ops), ops.device(
                        global_step.device):
                    tokens = array_ops.fill([self._tokens_per_step],
                                            constant_op.constant(False))
                    sync_op = sync_queue.enqueue_many(tokens)
            # non chief workers deque a token, they will block here until chief is done
            else:
                # Make sure train_ops are run
                with ops.control_dependencies(train_ops), ops.device(
                        global_step.device):
                    sync_op = sync_queue.dequeue()

            # All workers pull averaged values
            with ops.control_dependencies([sync_op]):
                local_update_op = self._assign_vars(local_vars, global_vars)
            return local_update_op

        # Check if we should push and average or not
        with ops.control_dependencies([apply_updates]):
            condition = math_ops.equal(
                math_ops.mod(global_step, self._interval_steps), 0)
            conditional_update = control_flow_ops.cond(condition,
                                                       _apply_averages,
                                                       control_flow_ops.no_op)

        chief_init_ops = []
        # Initialize accumulators, ops placed in ps
        for accum, dev in self._accumulator_list:
            with ops.device(dev):
                chief_init_ops.append(
                    accum.set_global_step(global_step, name="SetGlobalStep"))
        self._chief_init_op = control_flow_ops.group(*(chief_init_ops))

        return conditional_update
Ejemplo n.º 59
0
  def _apply_gradient(self, grad, var, indices=None):
    """The main function to update a variable.

    Args:
      grad: A Tensor containing gradient to apply.
      var: A Tensor containing the variable to update.
      indices: An array of integers, for sparse update.

    Returns:
      Updated variable var = var - learning_rate * preconditioner * grad

    If the gradient is dense, var and grad have the same shape.
    If the update is sparse, then the first dimension of the gradient and var
    may differ, others are all the same. In this case the indices array
    provides the set of indices of the variable which are to be updated with
    each row of the gradient.
    """
    global_step = self._global_step + 1

    # Update accumulated weighted average of gradients
    gbar = self.get_slot(var, "gbar")
    gbar_decay_t = GetParam(self._gbar_decay, global_step)
    gbar_weight_t = GetParam(self._gbar_weight, global_step)
    if indices is not None:
      # Note - the sparse update is not easily implemented, since the
      # algorithm needs all indices of gbar to be updated
      # if mat_gbar_decay != 1 or mat_gbar_decay != 0.
      # One way to make mat_gbar_decay = 1 is by rescaling.
      # If we want the update:
      #         G_{t+1} = a_{t+1} G_t + b_{t+1} w_t
      # define:
      #         r_{t+1} = a_{t+1} * r_t
      #         h_t = G_t / r_t
      # Then:
      #         h_{t+1} = h_t + (b_{t+1} / r_{t+1}) * w_t
      # So we get the mat_gbar_decay = 1 as desired.
      # We can implement this in a future version as needed.
      # However we still need gbar_decay = 0, otherwise all indices
      # of the variable will need to be updated.
      if self._gbar_decay != 0.0:
        tf_logging.warning("Not applying momentum for variable: %s" % var.name)
      gbar_updated = grad
    else:
      gbar_updated = self._weighted_average(gbar, self._gbar_decay,
                                            gbar_decay_t,
                                            gbar_weight_t * grad)

    # Update the preconditioners and compute the preconditioned gradient
    shape = var.get_shape()
    mat_g_list = []
    for i in range(len(shape)):
      mat_g_list.append(self.get_slot(var, "Gbar_" + str(i)))
    mat_gbar_decay_t = GetParam(self._mat_gbar_decay, global_step)
    mat_gbar_weight_t = GetParam(self._mat_gbar_weight, global_step)

    preconditioned_grad = gbar_updated
    v_rank = len(mat_g_list)
    neg_alpha = - GetParam(self._alpha, global_step) / v_rank
    svd_interval = GetParam(self._svd_interval, global_step)
    precond_update_interval = GetParam(self._precond_update_interval,
                                       global_step)
    for i, mat_g in enumerate(mat_g_list):
      # axes is the list of indices to reduce - everything but the current i.
      axes = list(range(i)) + list(range(i+1, v_rank))
      if shape[i] <= self._max_matrix_size:
        # If the tensor size is sufficiently small perform full Shampoo update
        # Note if precond_update_interval > 1 and mat_gbar_decay_t != 1, this
        # is not strictly correct. However we will use it for now, and
        # fix if needed. (G_1 = aG + bg ==> G_n = a^n G + (1+a+..+a^{n-1})bg)

        # pylint: disable=g-long-lambda,cell-var-from-loop
        mat_g_updated = control_flow_ops.cond(
            math_ops.mod(global_step, precond_update_interval) < 1,
            lambda: self._update_mat_g(
                mat_g, grad, axes, mat_gbar_decay_t,
                mat_gbar_weight_t * precond_update_interval, i),
            lambda: mat_g)

        if self._svd_interval == 1:
          mat_h = self._compute_power(var, mat_g_updated, shape[i], neg_alpha)
        else:
          mat_h = control_flow_ops.cond(
              math_ops.mod(global_step, svd_interval) < 1,
              lambda: self._compute_power(var, mat_g_updated, shape[i],
                                          neg_alpha, "H_" + str(i)),
              lambda: self.get_slot(var, "H_" + str(i)))

        # mat_h is a square matrix of size d_i x d_i
        # preconditioned_grad is a d_i x ... x d_n x d_0 x ... d_{i-1} tensor
        # After contraction with a d_i x d_i tensor
        # it becomes a d_{i+1} x ... x d_n x d_0 x ... d_i tensor
        # (the first dimension is contracted out, and the second dimension of
        # mat_h is appended).  After going through all the indices, it becomes
        # a d_0 x ... x d_n tensor again.
        preconditioned_grad = math_ops.tensordot(preconditioned_grad, mat_h,
                                                 axes=([0], [0]),
                                                 name="precond_" + str(i))
      else:
        # Tensor size is too large -- perform diagonal Shampoo update
        grad_outer = math_ops.reduce_sum(grad * grad, axis=axes)
        if i == 0 and indices is not None:
          assert self._mat_gbar_decay == 1.0
          mat_g_updated = state_ops.scatter_add(mat_g, indices,
                                                mat_gbar_weight_t * grad_outer)
          mat_h = math_ops.pow(
              array_ops.gather(mat_g_updated, indices) + self._epsilon,
              neg_alpha)
        else:
          mat_g_updated = self._weighted_average(mat_g,
                                                 self._mat_gbar_decay,
                                                 mat_gbar_decay_t,
                                                 mat_gbar_weight_t * grad_outer)
          mat_h = math_ops.pow(mat_g_updated + self._epsilon, neg_alpha)

        # Need to do the transpose to ensure that the tensor becomes
        # a d_{i+1} x ... x d_n x d_0 x ... d_i tensor as described above.
        preconditioned_grad = array_ops.transpose(
            preconditioned_grad, perm=list(range(1, v_rank)) + [0]) * mat_h

    # Update the variable based on the Shampoo update
    learning_rate_t = GetParam(self._learning_rate, global_step)
    if indices is not None:
      var_updated = state_ops.scatter_add(
          var, indices, -learning_rate_t * preconditioned_grad)
    else:
      var_updated = state_ops.assign_sub(var,
                                         learning_rate_t * preconditioned_grad)
    return var_updated
Ejemplo n.º 60
0
def batch_execute(global_step, thunks, batch_size, name=None):
    """Executes a subset of ops per global step.

  Given a list of thunks, each of which produces a single stateful op,
  ensures that exactly 'batch_size' ops are run per global step. Ops are
  scheduled in a round-robin fashion. For example, with 3 ops

    global_step | op0 | op1 | op2
    ------------+-----+-----+-----
        0       |  x  |  x  |
    ------------+-----+-----+-----
        1       |  x  |     |  x
    ------------+-----+-----+-----
        2       |     |  x  |  x
    ------------+-----+-----+-----
        3       |  x  |  x  |
    ------------+-----+-----+-----
        4       |  x  |     |  x

  Does not guarantee order of op execution within a single global step.

  Args:
    global_step: Tensor indicating time. Determines which ops run.
    thunks: List of thunks. Each thunk encapsulates one op. Return values are
      ignored.
    batch_size: int. Number of ops to execute per global_step.
    name: string or None. Name scope for newly added ops.

  Returns:
    List of ops. Exactly 'batch_size' ops are guaranteed to have an effect
    every global step.
  """
    def true_fn(thunk):
        """Ensures thunk is executed and returns an Op (not a Tensor)."""
        def result():
            with ops.control_dependencies([thunk()]):
                return control_flow_ops.no_op()

        return result

    def false_fn(_):
        """Executes a no-op."""
        def result():
            return control_flow_ops.no_op()

        return result

    with ops.name_scope(name, "batch_execute"):
        true_fns = [true_fn(thunk) for thunk in thunks]
        false_fns = [false_fn(thunk) for thunk in thunks]
        num_thunks = len(thunks)
        conditions = [
            math_ops.less(
                math_ops.mod(batch_size - 1 + global_step * batch_size - j,
                             num_thunks), batch_size)
            for j in range(num_thunks)
        ]
        result = [
            control_flow_ops.cond(condition, true_fn, false_fn)
            for (condition, true_fn,
                 false_fn) in zip(conditions, true_fns, false_fns)
        ]
        return result