def rot90(image, k=1, name=None): """Rotate an image counter-clockwise by 90 degrees. Args: image: A 3-D tensor of shape `[height, width, channels]`. k: A scalar integer. The number of times the image is rotated by 90 degrees. name: A name for this operation (optional). Returns: A rotated 3-D tensor of the same type and shape as `image`. """ with ops.name_scope(name, 'rot90', [image, k]) as scope: image = ops.convert_to_tensor(image, name='image') _Check3DImage(image, require_static=False) k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k') k.get_shape().assert_has_rank(0) k = math_ops.mod(k, 4) def _rot90(): return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2]) def _rot180(): return array_ops.reverse_v2(image, [0, 1]) def _rot270(): return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1]) cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), (math_ops.equal(k, 3), _rot270)] ret = control_flow_ops.case(cases, default=lambda: image, exclusive=True, name=scope) ret.set_shape([None, None, image.get_shape()[2]]) return ret
def pack_uint8_r2_to_uint32(self, test_input): num_rows, num_columns = test_input.get_shape().as_list() num_output_columns = int(math.ceil(num_columns / 4.0)) padding_input = array_ops.pad( math_ops.cast(test_input, dtype=dtypes.uint8), constant_op.constant([[ 0, 0, ], [0, num_output_columns * 4 - num_columns]])) output = array_ops.zeros([num_rows, num_output_columns], dtype=dtypes.uint32) num_elements_per_pack = 4 shift_bits = 8 iota_r1 = math_ops.range(num_output_columns * num_elements_per_pack) for p in range(num_elements_per_pack): selected_index = math_ops.equal( math_ops.mod(iota_r1, num_elements_per_pack), p) gather_index = array_ops.boolean_mask(iota_r1, selected_index) gathered_input = array_ops.gather(padding_input, gather_index, axis=1) total_shift_bits = shift_bits * (num_elements_per_pack - p - 1) left_shift_input = bitwise_ops.left_shift( math_ops.cast(gathered_input, dtype=dtypes.uint32), total_shift_bits) output = bitwise_ops.bitwise_or(output, left_shift_input) return output
def insert_transformed_feature(self, columns_to_tensors): """Handles sparse column to id conversion.""" sparse_id_values = math_ops.mod(columns_to_tensors[self.name].values, self.bucket_size) columns_to_tensors[self] = ops.SparseTensor( columns_to_tensors[self.name].indices, sparse_id_values, columns_to_tensors[self.name].shape)
def gcd(a, b, name=None): """Returns the greatest common divisor via Euclid's algorithm. Args: a: The dividend. A scalar integer `Tensor`. b: The divisor. A scalar integer `Tensor`. name: An optional name for the operation. Returns: A scalar `Tensor` representing the greatest common divisor between `a` and `b`. Raises: ValueError: If `a` or `b` are not scalar integers. """ with ops.name_scope(name, 'gcd', [a, b]): a = ops.convert_to_tensor(a) b = ops.convert_to_tensor(b) a.shape.assert_has_rank(0) b.shape.assert_has_rank(0) if not a.dtype.is_integer: raise ValueError('a must be an integer type. Got: %s' % a.dtype) if not b.dtype.is_integer: raise ValueError('b must be an integer type. Got: %s' % b.dtype) cond = lambda _, b: math_ops.greater(b, array_ops.zeros_like(b)) body = lambda a, b: [b, math_ops.mod(a, b)] a, b = control_flow_ops.while_loop(cond, body, [a, b], back_prop=False) return a
def testFilteredElementsStats(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(101).filter( lambda x: math_ops.equal(math_ops.mod(x, 3), 0)) dataset = dataset_transformation(dataset, aggregator) iterator = dataset_ops.make_initializable_iterator(dataset) next_element = iterator.get_next() summary_t = aggregator.get_summary() with self.test_session() as sess: self.evaluate(iterator.initializer) for i in range(34): self.assertEqual(i * 3, self.evaluate(next_element)) if i is not 0: self._assertSummaryHasScalarValue( self.evaluate(summary_t), "Filter::dropped_elements", float(i * 2)) self._assertSummaryHasScalarValue( self.evaluate(summary_t), "Filter::filtered_elements", float(i + 1)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element) self._assertSummaryHasScalarValue( self.evaluate(summary_t), "Filter::dropped_elements", 67.0) self._assertSummaryHasScalarValue( self.evaluate(summary_t), "Filter::filtered_elements", 34.0)
def testFilteredElementsStats(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(101).filter( lambda x: math_ops.equal(math_ops.mod(x, 3), 0)) dataset = self.datasetExperimentalStats(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(34): self.assertEqual(i * 3, self.evaluate(next_element())) handle = self.getHandle(aggregator) if i != 0: self.assertStatisticsHasScalarValue( handle, self.regexForNodeName("FilterDataset", "dropped_elements"), float(i * 2)) self.assertStatisticsHasScalarValue( handle, self.regexForNodeName("FilterDataset", "filtered_elements"), float(i + 1)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) handle = self.getHandle(aggregator) self.assertStatisticsHasScalarValue( handle, self.regexForNodeName("FilterDataset", "dropped_elements"), 67.0) self.assertStatisticsHasScalarValue( handle, self.regexForNodeName("FilterDataset", "filtered_elements"), 34.0)
def testFilteredElementsStats(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(101).filter( lambda x: math_ops.equal(math_ops.mod(x, 3), 0)) dataset = dataset_transformation(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(34): self.assertEqual(i * 3, self.evaluate(next_element())) summary_str = self.evaluate(aggregator.get_summary()) if i is not 0: self._assertSummaryHasScalarValue( summary_str, self.regexForNodeName("FilterDataset", "dropped_elements"), float(i * 2)) self._assertSummaryHasScalarValue( summary_str, self.regexForNodeName("FilterDataset", "filtered_elements"), float(i + 1)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) summary_str = self.evaluate(aggregator.get_summary()) self._assertSummaryHasScalarValue( summary_str, self.regexForNodeName("FilterDataset", "dropped_elements"), 67.0) self._assertSummaryHasScalarValue( summary_str, self.regexForNodeName("FilterDataset", "filtered_elements"), 34.0)
def _TileGrad(op, grad): """Sum reduces grad along the tiled dimensions.""" input_shape = array_ops.shape(op.inputs[0]) # We interleave multiples and input_shape to get split_shape, # reshape grad to split_shape, and reduce along all even # dimensions (the tiled dimensions) to get the result # with shape input_shape. For example # input_shape = [20, 30, 40] # multiples = [2, 3, 4] # split_shape = [2, 20, 3, 30, 4, 40] # axes = [0, 2, 4] split_shape = array_ops.reshape( array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1]) axes = math_ops.range(0, array_ops.size(split_shape), 2) # Sum reduces grad along the first dimension for IndexedSlices if isinstance(grad, ops.IndexedSlices): grad = math_ops.unsorted_segment_sum( grad.values, math_ops.mod(grad.indices, input_shape[0]), input_shape[0]) split_shape = array_ops.concat([[1], split_shape[1:]], axis=0) input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes) # Fix shape inference if not context.executing_eagerly(): input_grad.set_shape(op.inputs[0].get_shape()) return [input_grad, None]
def _shard_indices(self, keys): key_shape = keys.get_shape() if key_shape.ndims > 1: # If keys are a matrix (i.e. a single key is a vector), we use the first # element of each key vector to determine the shard. keys = array_ops.slice(keys, [0, 0], [key_shape[0].value, 1]) keys = array_ops.reshape(keys, [-1]) indices = math_ops.mod(math_ops.abs(keys), self._num_shards) return math_ops.cast(indices, dtypes.int32)
def testFilterRange(self): dataset = dataset_ops.Dataset.range(100).filter( lambda x: math_ops.not_equal(math_ops.mod(x, 3), 2)) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.test_session() as sess: self.assertEqual(0, sess.run(get_next)) self.assertEqual(1, sess.run(get_next)) self.assertEqual(3, sess.run(get_next))
def testFixed(self): x = [5, 10, 23] for dtype in [np.int32, np.int64]: # Test scalar and vector versions. for denom in [x[0], x]: x_np = np.array(x, dtype=dtype) with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) y_tf = math_ops.mod(x_tf, denom) y_tf_np = y_tf.eval() y_np = np.mod(x_np, denom) self.assertAllClose(y_tf_np, y_np)
def testFloat(self): x = [0.5, 0.7, 0.3] for dtype in [np.float32, np.double]: # Test scalar and vector versions. for denom in [x[0], [x[0]] * 3]: x_np = np.array(x, dtype=dtype) with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) y_tf = math_ops.mod(x_tf, denom) y_tf_np = y_tf.eval() y_np = np.fmod(x_np, denom) self.assertAllClose(y_tf_np, y_np, atol=1e-2)
def _add_sinusoids_signal(x, time, min_timescale=1.0, max_timescale=1.0e4): """Adds a bunch of sinusoids of different frequencies to a Tensor. Each channel of the input Tensor is incremented by a sinusoid of a different frequency and phase. This allows attention to learn to use absolute and relative positions. Timing signals should be added to some precursors of both the query and the memory inputs to attention. The use of relative position is possible because sin(x+y) and cos(x+y) can be experessed in terms of y, sin(x) and cos(x). In particular, we use a geometric sequence of timescales starting with min_timescale and ending with max_timescale. The number of different timescales is equal to channels / 2. For each timescale, we generate the two sinusoidal signals sin(timestep/timescale) and cos(timestep/timescale). All of these sinusoids are concatenated in the channels dimension. Args: x: a Tensor with shape [batch, length, channels] min_timescale: a float max_timescale: a float Returns: a Tensor the same shape as x. """ channels = x.get_shape().as_list()[-1] if x.get_shape().ndims == 3: # [batch_size, timesteps, dim] length = array_ops.shape(x)[1] position = math_ops.to_float(math_ops.range(length)) elif x.get_shape().ndims == 2: # [batch_size, dim] length = 1 position = math_ops.to_float(math_ops.range(time, time + 1)) else: raise ValueError("need a Tensor with rank 2 or 3") num_timescales = channels // 2 log_timescale_increment = ( math.log(float(max_timescale) / float(min_timescale)) / (math_ops.to_float(num_timescales) - 1)) inv_timescales = min_timescale * math_ops.exp( math_ops.to_float(math_ops.range(num_timescales)) * -log_timescale_increment) scaled_time = array_ops.expand_dims(position, 1) * array_ops.expand_dims(inv_timescales, 0) signal = array_ops.concat([math_ops.sin(scaled_time), math_ops.cos(scaled_time)], axis=1) signal = array_ops.pad(signal, [[0, 0], [0, math_ops.mod(channels, 2)]]) if x.get_shape().ndims == 3: signal = array_ops.reshape(signal, [1, length, channels]) else: signal = array_ops.reshape(signal, [1, channels]) return x + signal
def adjust_hue(image, delta, name=None): """Adjust hue of an RGB image. This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the hue channel, converts back to RGB and then back to the original data type. If several adjustments are chained it is advisable to minimize the number of redundant conversions. `image` is an RGB image. The image hue is adjusted by converting the image to HSV and rotating the hue channel (H) by `delta`. The image is then converted back to RGB. `delta` must be in the interval `[-1, 1]`. Args: image: RGB image or images. Size of the last dimension must be 3. delta: float. How much to add to the hue channel. name: A name for this operation (optional). Returns: Adjusted image(s), same shape and DType as `image`. """ with ops.name_scope(name, 'adjust_hue', [image]) as name: image = ops.convert_to_tensor(image, name='image') # Remember original dtype to so we can convert back if needed orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) # TODO(zhengxq): we will switch to the fused version after we add a GPU # kernel for that. fused = os.environ.get('TF_ADJUST_HUE_FUSED', '') fused = fused.lower() in ('true', 't', '1') if not fused: hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) # Note that we add 2*pi to guarantee that the resulting hue is a positive # floating point number since delta is [-0.5, 0.5]. hue = math_ops.mod(hue + (delta + 1.), 1.) hsv_altered = array_ops.concat_v2([hue, saturation, value], 2) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) else: rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) return convert_image_dtype(rgb_altered, orig_dtype)
def do_test(count, modulus): dataset = dataset_ops.Dataset.from_tensor_slices(components).map( _map_fn).repeat(count).filter( lambda x, _y, _z: math_ops.equal(math_ops.mod(x, modulus), 0)) self.assertEqual([c.shape[1:] for c in components], [shape for shape in dataset.output_shapes]) get_next = self.getNext(dataset) for _ in range(count): for i in [x for x in range(7) if x**2 % modulus == 0]: result = self.evaluate(get_next()) for component, result_component in zip(components, result): self.assertAllEqual(component[i]**2, result_component) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next())
def restart_decay_fn(global_step): if global_step is None: raise ValueError("global_step is required for cosine_decay.") global_step = math_ops.minimum(global_step, decay_steps) num = math_ops.mod(num_periods * math_ops.to_float(global_step), decay_steps) fraction = num / math_ops.to_float(decay_steps) decayed = 0.5 * ( 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction)) if zero_after is not None: tmp = math_ops.to_float( num_periods * global_step) / math_ops.to_float(decay_steps) decayed = array_ops.where( math_ops.greater_equal(tmp, zero_after), 0.0, decayed) return decayed
def _raised_cosine_window(name, default_name, window_length, periodic, dtype, a, b): """Helper function for computing a raised cosine window. Args: name: Name to use for the scope. default_name: Default name to use for the scope. window_length: A scalar `Tensor` or integer indicating the window length. periodic: A bool `Tensor` indicating whether to generate a periodic or symmetric window. dtype: A floating point `DType`. a: The alpha parameter to the raised cosine window. b: The beta parameter to the raised cosine window. Returns: A `Tensor` of shape `[window_length]` of type `dtype`. Raises: ValueError: If `dtype` is not a floating point type or `window_length` is not scalar or `periodic` is not scalar. """ if not dtype.is_floating: raise ValueError('dtype must be a floating point type. Found %s' % dtype) with ops.name_scope(name, default_name, [window_length, periodic]): window_length = ops.convert_to_tensor(window_length, dtype=dtypes.int32, name='window_length') window_length.shape.assert_has_rank(0) window_length_const = tensor_util.constant_value(window_length) if window_length_const == 1: return array_ops.ones([1], dtype=dtype) periodic = math_ops.cast( ops.convert_to_tensor(periodic, dtype=dtypes.bool, name='periodic'), dtypes.int32) periodic.shape.assert_has_rank(0) even = 1 - math_ops.mod(window_length, 2) n = math_ops.cast(window_length + periodic * even - 1, dtype=dtype) count = math_ops.cast(math_ops.range(window_length), dtype) cos_arg = constant_op.constant(2 * np.pi, dtype=dtype) * count / n if window_length_const is not None: return math_ops.cast(a - b * math_ops.cos(cos_arg), dtype=dtype) return control_flow_ops.cond( math_ops.equal(window_length, 1), lambda: array_ops.ones([1], dtype=dtype), lambda: math_ops.cast(a - b * math_ops.cos(cos_arg), dtype=dtype))
def _update_damping(self, prev_batch, global_step): """Adapts damping parameter. Check KFAC (Section 6.5) for the details. The damping parameter is updated according to the Levenberg-Marquardt rule every `self._damping_adaptation_interval` iterations. Args: prev_batch: Tensor or tuple of tensors which can be passed to `self._loss_fn` to evaluate loss. global_step: `Variable` which keeps track of number of times the training variables have been updated. Returns: A `tf.cond` op which updates the damping parameter. """ def compute_damping(): """"Adapts damping parameter based on "reduction ratio". Reduction ratio captures how closely the quadratic approximation to the loss function approximates the actual loss within a trust region. The damping update tries to make the damping as small as possible while maintaining the property that the quadratic model remains a good local approximation to the loss function. Returns: An Op to assign newly computed damping value to `self._damping`. """ prev_batch_loss = self._loss_fn(prev_batch) with ops.control_dependencies([prev_batch_loss]): rho_assign = self._rho.assign( (prev_batch_loss - self._prev_loss) / self._q_model_change) with ops.control_dependencies([rho_assign]): new_damping = control_flow_ops.case( [(self._rho < 0.25, lambda: self.damping / self._omega), (self._rho > 0.75, lambda: self.damping * self._omega)], lambda: self.damping) with ops.control_dependencies([new_damping]): new_damping_min = math_ops.maximum(new_damping, self._min_damping) return control_flow_ops.group(self._damping.assign(new_damping_min)) return control_flow_ops.cond( math_ops.equal( math_ops.mod(global_step + 1, self._damping_adaptation_interval), 0), compute_damping, control_flow_ops.no_op)
def do_test(count, modulus): # pylint: disable=missing-docstring dataset = dataset_ops.Dataset.from_tensor_slices(components).map( _map_fn).repeat(count) # pylint: disable=g-long-lambda dataset = self.apply_filter( dataset, lambda x, _y, _z: math_ops.equal( math_ops.mod(x, modulus), 0)) # pylint: enable=g-long-lambda self.assertEqual( [c.shape[1:] for c in components], [shape for shape in dataset_ops.get_legacy_output_shapes(dataset)]) get_next = self.getNext(dataset) for _ in range(count): for i in [x for x in range(7) if x**2 % modulus == 0]: result = self.evaluate(get_next()) for component, result_component in zip(components, result): self.assertAllEqual(component[i]**2, result_component) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next())
def testFilterDataset(self): components = ( np.arange(7, dtype=np.int64), np.array([[1, 2, 3]], dtype=np.int64) * np.arange( 7, dtype=np.int64)[:, np.newaxis], np.array(37.0, dtype=np.float64) * np.arange(7) ) count = array_ops.placeholder(dtypes.int64, shape=[]) modulus = array_ops.placeholder(dtypes.int64) def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) iterator = ( dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) .repeat(count) .filter(lambda x, _y, _z: math_ops.equal(math_ops.mod(x, modulus), 0)) .make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() self.assertEqual([c.shape[1:] for c in components], [t.shape for t in get_next]) with self.test_session() as sess: # Test that we can dynamically feed a different modulus value for each # iterator. def do_test(count_val, modulus_val): sess.run(init_op, feed_dict={count: count_val, modulus: modulus_val}) for _ in range(count_val): for i in [x for x in range(7) if x**2 % modulus_val == 0]: result = sess.run(get_next) for component, result_component in zip(components, result): self.assertAllEqual(component[i]**2, result_component) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) do_test(14, 2) do_test(4, 18) # Test an empty dataset. do_test(0, 1)
def adjust_hue(image, delta, name=None): with ops.op_scope([image], name, 'adjust_hue') as name: # Remember original dtype to so we can convert back if needed orig_dtype = image.dtype flt_image = tf.image.convert_image_dtype(image, tf.float32) hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = tf.slice(hsv, [0, 0, 0, 0], [-1, -1, -1, 1]) saturation = tf.slice(hsv, [0, 0, 0, 1], [-1, -1, -1, 1]) value = tf.slice(hsv, [0, 0, 0, 2], [-1, -1, -1, 1]) # Note that we add 2*pi to guarantee that the resulting hue is a positive # floating point number since delta is [-0.5, 0.5]. hue = math_ops.mod(hue + (delta + 1.), 1.) hsv_altered = tf.concat(3, [hue, saturation, value]) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) return tf.image.convert_image_dtype(rgb_altered, orig_dtype)
def adjust_hue(image, delta, name=None): """Adjust hue of an RGB image. This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the hue channel, converts back to RGB and then back to the original data type. If several adjustments are chained it is advisable to minimize the number of redundant conversions. `image` is an RGB image. The image hue is adjusted by converting the image to HSV and rotating the hue channel (H) by `delta`. The image is then converted back to RGB. `delta` must be in the interval `[-1, 1]`. Args: image: RGB image or images. Size of the last dimension must be 3. delta: float. How much to add to the hue channel. name: A name for this operation (optional). Returns: Adjusted image(s), same shape and DType as `image`. """ with ops.op_scope([image], name, 'adjust_hue') as name: # Remember original dtype to so we can convert back if needed orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) # Note that we add 2*pi to guarantee that the resulting hue is a positive # floating point number since delta is [-0.5, 0.5]. hue = math_ops.mod(hue + (delta + 1.), 1.) hsv_altered = array_ops.concat(2, [hue, saturation, value]) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) return convert_image_dtype(rgb_altered, orig_dtype)
def _Grad(op, grad): """A gradient function for IRFFT with the provided `rank` and `rfft_fn`.""" # Generate a simple mask like [1.0, 2.0, ..., 2.0, 1.0] for even-length FFTs # and [1.0, 2.0, ..., 2.0] for odd-length FFTs. To reduce extra ops in the # graph we special-case the situation where the FFT length and last # dimension of the input are known at graph construction time. fft_length = op.inputs[1] is_odd = math_ops.mod(fft_length[-1], 2) input_last_dimension = array_ops.shape(op.inputs[0])[-1] mask = array_ops.concat( [[1.0], 2.0 * array_ops.ones([input_last_dimension - 2 + is_odd]), array_ops.ones([1 - is_odd])], 0) rsize = math_ops.reciprocal(math_ops.to_float(_FFTSizeForGrad(grad, rank))) # The gradient of IRFFT is the RFFT of the incoming gradient times a scaling # factor and a mask. The mask scales the gradient for the Hermitian # symmetric components of the RFFT by a factor of two, since these # components are de-duplicated in the RFFT. rfft = rfft_fn(grad, fft_length) return rfft * math_ops.cast(rsize * mask, dtypes.complex64), None
def gcd(a, b, name=None): """Returns the greatest common divisor via Euclid's algorithm. Args: a: The dividend. A scalar integer `Tensor`. b: The divisor. A scalar integer `Tensor`. name: An optional name for the operation. Returns: A scalar `Tensor` representing the greatest common divisor between `a` and `b`. Raises: ValueError: If `a` or `b` are not scalar integers. """ with ops.name_scope(name, 'gcd', [a, b]): a = ops.convert_to_tensor(a) b = ops.convert_to_tensor(b) a.shape.assert_has_rank(0) b.shape.assert_has_rank(0) if not a.dtype.is_integer: raise ValueError('a must be an integer type. Got: %s' % a.dtype) if not b.dtype.is_integer: raise ValueError('b must be an integer type. Got: %s' % b.dtype) # TPU requires static shape inference. GCD is used for subframe size # computation, so we should prefer static computation where possible. const_a = tensor_util.constant_value(a) const_b = tensor_util.constant_value(b) if const_a is not None and const_b is not None: return ops.convert_to_tensor(fractions.gcd(const_a, const_b)) cond = lambda _, b: math_ops.greater(b, array_ops.zeros_like(b)) body = lambda a, b: [b, math_ops.mod(a, b)] a, b = control_flow_ops.while_loop(cond, body, [a, b], back_prop=False) return a
def testMultiplePrefetchStats(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(10).prefetch( 2).filter(lambda x: math_ops.equal(math_ops.mod(x, 2), 0)).prefetch(1) dataset = self.datasetExperimentalStats(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(5): self.assertEqual(i * 2, self.evaluate(next_element())) handle = self.getHandle(aggregator) # TODO(shivaniagarwal): using exact name of prefetch node than the regex, # to differentiate between two prefetch. This might break in future, at # which point, it would be best to disable this test. self.assertStatisticsHasScalarValue( handle, "PrefetchDataset/_5::buffer_capacity", 2) self.assertStatisticsContains(handle, "PrefetchDataset/_5::buffer_size") self.assertStatisticsHasScalarValue( handle, "PrefetchDataset/_8::buffer_capacity", 1) self.assertStatisticsContains(handle, "PrefetchDataset/_8::buffer_size") with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element())
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to global variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ apply_updates = self._opt.apply_gradients(grads_and_vars) with ops.control_dependencies([apply_updates]): local_update = state_ops.assign_add(self._local_step, 1, name='local_step_update').op # update global variables. def _Update_global_variables(): local_vars = [v for g, v in grads_and_vars if g is not None] global_center_vars = [self._global_map[var] for var in local_vars] local_center_vars = [self._local_map[var] for var in local_vars] local_center_vars_update = [] for lvar, var in zip(local_center_vars, global_center_vars): local_center_vars_update.append(lvar.assign(var)) update_ops = [] differences = [] with ops.control_dependencies(local_center_vars_update): for v, lv in zip(local_vars, local_center_vars): with ops.device(v.device): differences.append(math_ops.subtract(v, lv)) for lvar, diff in zip(local_vars, differences): with ops.device(lvar.device): update_ops.append( state_ops.assign_sub( lvar, math_ops.multiply(self._moving_rate, diff))) for var, diff in zip(global_center_vars, differences): with ops.device(var.device): update_ops.append( state_ops.assign_add( var, math_ops.multiply(self._moving_rate, diff))) if global_step: with ops.colocate_with(global_step): update_ops.append(state_ops.assign_add(global_step, 1)) variable_update = control_flow_ops.group(*(update_ops)) return variable_update with ops.control_dependencies([local_update]): condition = math_ops.equal( math_ops.mod(self._local_step, self._period), 0) conditional_update = control_flow_ops.cond( condition, _Update_global_variables, control_flow_ops.no_op) return conditional_update
def do_training(train_op, init_fn=None, summary_op=None, lr=None): global savers graph = ops.get_default_graph() with graph.as_default(): global_step = variables.get_or_create_global_step() saver = tf_saver.Saver(max_to_keep=0) with ops.name_scope('init_ops'): init_op = tf_variables.global_variables_initializer() ready_op = tf_variables.report_uninitialized_variables() local_init_op = control_flow_ops.group( tf_variables.local_variables_initializer(), data_flow_ops.tables_initializer()) summary_writer = supervisor.Supervisor.USE_DEFAULT with ops.name_scope('train_step'): train_step_kwargs = {} if not FLAGS.max_number_of_steps is None: should_stop_op = math_ops.greater_equal( global_step, FLAGS.max_number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op if FLAGS.log_every_n_steps > 0: train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, FLAGS.log_every_n_steps), 0) prefix = "loc/net" lp = len(prefix) vdic = { "InceptionV2" + v.op.name[lp:]: v for v in tf.trainable_variables() if v.name.startswith(prefix) and v.name.find("Logits/") < 0 } _saver = tf_saver.Saver(vdic) savers.append(_saver) for i in xrange(NUM_STN): prefix = "stn%d/net" % i lp = len(prefix) vdic = { "InceptionV2" + v.op.name[lp:]: v for v in tf.trainable_variables() if v.name.startswith(prefix) and v.name.find("Logits/") < 0 } # saver = tf.train.Saver(vdic) _saver = tf_saver.Saver(vdic) savers.append(_saver) prt("savers %d" % len(savers)) is_chief = True logdir = FLAGS.train_dir sv = supervisor.Supervisor(graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=None, local_init_op=local_init_op, ready_for_local_init_op=None, ready_op=ready_op, summary_op=summary_op, summary_writer=summary_writer, global_step=global_step, saver=saver, save_summaries_secs=FLAGS.save_summaries_secs, save_model_secs=FLAGS.save_interval_secs, init_fn=init_fn) if summary_writer is not None: train_step_kwargs['summary_writer'] = sv.summary_writer with sv.managed_session('', start_standard_services=False, config=None) as sess: logging.info('Starting Session.') if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: _wait_for_step( sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxint)) sv.start_queue_runners(sess) logging.info('Starting Queues.') try: while not sv.should_stop(): total_loss, global_step_value, should_stop = train_step( sess, train_op, global_step, lr, train_step_kwargs) current_epoch = int( math.ceil(float(global_step_value) / FLAGS.steps_in_epoch)) if global_step_value > 0 and global_step_value % FLAGS.save_every_n_steps == 0: sv.saver.save(sess, sv.save_path, global_step=sv.global_step) if should_stop: logging.info('Stopping Training.') break except errors.OutOfRangeError: # OutOfRangeError is thrown when epoch limit per # tf.train.limit_epochs is reached. logging.info('Caught OutOfRangeError. Stopping Training.') if logdir and sv.is_chief: logging.info('Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
def train(train_op, logdir, loss, logits, batch, endpoint, train_step_fn=train_step, train_step_kwargs=_USE_DEFAULT, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_steps=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=_USE_DEFAULT, init_fn=None, ready_op=_USE_DEFAULT, summary_op=_USE_DEFAULT, save_summaries_secs=600, summary_writer=_USE_DEFAULT, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None, session_config=None, trace_every_n_steps=None): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: The directory where training logs are written to. If None, model checkpoints and summaries will not be written. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step and logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The BNS name of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then slim.variables.get_or_create_global_step() is used. number_of_steps: The max number of gradient steps to take during training. If the value is left as None, training proceeds indefinitely. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.initialize_all_variables()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If left to its default value, then the session is initialized by calling `tf.initialize_local_variables()` and `tf.initialize_all_tables()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. ready_op: Operation to check if the model is ready to use. If left to its default value, then the session checks for readiness by calling `tf.report_uninitialized_variables()`. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. summary_writer: `SummaryWriter` to use. Can be `None` to indicate that no summaries should be written. If unset, we create a SummaryWriter. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If None, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. trace_every_n_steps: produce and save a `Timeline` in Chrome trace format and add it to the summaries every `trace_every_n_steps`. If None, no trace information will be produced or saved. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, if `number_of_steps` is negative, or if `trace_every_n_steps` is not `None` and no `logdir` is provided. """ if train_op is None: raise ValueError('train_op cannot be None.') if logdir is None: if summary_op != _USE_DEFAULT: raise ValueError('Cannot provide summary_op because logdir=None') if saver is not None: raise ValueError('Cannot provide saver because logdir=None') if trace_every_n_steps is not None: raise ValueError('Cannot provide trace_every_n_steps because ' 'logdir=None') if sync_optimizer and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.' ) if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = variables.get_or_create_global_step() saver = saver or tf_saver.Saver() with ops.name_scope('init_ops'): if init_op == _USE_DEFAULT: init_op = tf_variables.initialize_all_variables() if ready_op == _USE_DEFAULT: ready_op = tf_variables.report_uninitialized_variables() if local_init_op == _USE_DEFAULT: local_init_op = control_flow_ops.group( tf_variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() if summary_writer == _USE_DEFAULT: summary_writer = supervisor.Supervisor.USE_DEFAULT cleanup_op = None if is_chief and sync_optimizer: if not isinstance(sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer' ) # Need to create these BEFORE the supervisor finalizes the graph: with ops.control_dependencies([init_op]): init_tokens_op = sync_optimizer.get_init_tokens_op() init_op = init_tokens_op chief_queue_runner = sync_optimizer.get_chief_queue_runner() cleanup_op = sync_optimizer.get_clean_up_op() if train_step_kwargs == _USE_DEFAULT: with ops.name_scope('train_step'): train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal( global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) if is_chief and trace_every_n_steps is not None: train_step_kwargs['should_trace'] = math_ops.equal( math_ops.mod(global_step, trace_every_n_steps), 0) train_step_kwargs['logdir'] = logdir sv = supervisor.Supervisor(graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, ready_op=ready_op, summary_op=summary_op, summary_writer=summary_writer, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) if summary_writer is not None: train_step_kwargs['summary_writer'] = sv.summary_writer should_retry = True while should_retry: try: should_retry = False with sv.managed_session(master, start_standard_services=False, config=session_config) as sess: logging.info('Starting Session.') if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: slim.learning._wait_for_step( sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxint)) sv.start_queue_runners(sess) logging.info('Starting Queues.') if is_chief and sync_optimizer: sv.start_queue_runners(sess, [chief_queue_runner]) try: while not sv.should_stop(): try: total_loss, should_stop = train_step_fn( sess, train_op, endpoint, batch, logits, loss, global_step, number_of_steps, train_step_kwargs) except tf.errors.OutOfRangeError: if logdir and sv.is_chief: sv.saver.save(sess, sv.save_path, global_step=sv.global_step) if sv.is_chief and cleanup_op is not None: sess.run(cleanup_op) print('Training finished over one epoch....') break if (global_step % 2): print '%f steps finished, final step loss: %f ' % ( global_step, total_loss) if should_stop: logging.info('Stopping Training.') break if logdir and sv.is_chief: logging.info( 'Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) except tf.errors.OutOfRangeError: if logdir and sv.is_chief: logging.info( 'Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) if sv.is_chief and cleanup_op is not None: sess.run(cleanup_op) print('Training finished over one epoch....') except: if sv.is_chief and cleanup_op is not None: logging.info('About to execute sync_clean_up_op!') sess.run(cleanup_op) raise except errors.AbortedError: # Always re-run on AbortedError as it indicates a restart of one of the # distributed tensorflow servers. logging.info('Retrying training!') should_retry = True return total_loss
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to global variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. """ local_vars = [v for g, v in grads_and_vars if g is not None] grads = [g for g, v in grads_and_vars if g is not None] def _variable_creator(next_creator, collections, **kwargs): if not collections: collections = [ops.GraphKeys.LOCAL_VARIABLES] elif ops.GraphKeys.GLOBAL_VARIABLES in collections: collections = list(collections) collections.append(ops.GraphKeys.LOCAL_VARIABLES) collections.remove(ops.GraphKeys.GLOBAL_VARIABLES) return next_creator(collections=collections, **kwargs) # theta = theta - lr * grad with variable_scope.variable_creator_scope(_variable_creator): local_update_op = self._opt.apply_gradients(grads_and_vars) # a = a + grad update_ops = [] update_ops.append(local_update_op) grad_vars = [self._grad_map[var] for var in local_vars] for g, grad_var in zip(grads, grad_vars): update_ops.append(state_ops.assign_add(grad_var, g)) global_center_vars = [self._global_map[var] for var in local_vars] # update global variables. def _Update_global_variables(): global_norm = [] # a = a / t for g in grad_vars: global_norm.append(state_ops.assign(g, g / self._period)) # apply with ops.control_dependencies(global_norm): apply_global_op = self._opt.apply_gradients( zip(grad_vars, global_center_vars)) # pull with ops.control_dependencies([apply_global_op]): update_ops = [] if global_step: with ops.colocate_with(global_step): update_ops.append(state_ops.assign_add(global_step, 1)) for lvar in local_vars: g_val = self._global_map[lvar].read_value() update_ops.append(state_ops.assign(lvar, g_val)) for grad_var in grad_vars: update_ops.append( state_ops.assign(grad_var, array_ops.zeros_like(grad_var))) variable_update = control_flow_ops.group(*(update_ops)) return variable_update local_update = state_ops.assign_add(self._local_step, 1, name='local_step_update').op with ops.control_dependencies([local_update]): condition = math_ops.equal( math_ops.mod(self._local_step, self._period), 0) with ops.control_dependencies(update_ops): conditional_update = control_flow_ops.cond( condition, _Update_global_variables, control_flow_ops.no_op) return conditional_update
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] lengths_to_add = array_ops.one_hot( indices=array_ops.fill([batch_size, beam_width], end_token), depth=vocab_size, on_value=np.int64(0), off_value=np.int64(1), dtype=dtypes.int64) add_mask = math_ops.to_int64(math_ops.logical_not(previously_finished)) lengths_to_add *= array_ops.expand_dims(add_mask, 2) new_prediction_lengths = ( lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) # Calculate the scores for each beam scores = _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight, dtype=logits.dtype) time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_shape = array_ops.shape(scores) scores_flat = array_ops.reshape(scores, [batch_size, -1]) # Pick the next beams according to the specified successors function next_beam_size = ops.convert_to_tensor( beam_width, dtype=dtypes.int32, name="beam_width") next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper( gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod( word_indices, vocab_size, name="next_beam_word_ids") next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32( word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or( previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged. # 2. Beams that are now finished (EOS predicted) have their length # increased by 1. # 3. Beams that are not yet finished have their length increased by 1. lengths_to_add = math_ops.to_int64(math_ops.logical_not(previously_finished)) next_prediction_len = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState( cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) output = BeamSearchDecoderOutput( scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def _train_deeplab_model(iterator, num_of_classes, ignore_label): """Trains the deeplab model. Args: iterator: An iterator of type tf.data.Iterator for images and labels. num_of_classes: Number of classes for the dataset. ignore_label: Ignore label for the dataset. Returns: train_tensor: A tensor to update the model variables. summary_op: An operation to log the summaries. """ global_step = tf.train.get_or_create_global_step() learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) tower_losses = [] tower_grads = [] for i in range(FLAGS.num_clones): with tf.device('/gpu:%d' % i): # First tower has default name scope. name_scope = ('clone_%d' % i) if i else '' with tf.name_scope(name_scope) as scope: loss = _tower_loss(iterator=iterator, num_of_classes=num_of_classes, ignore_label=ignore_label, scope=scope, reuse_variable=(i != 0)) tower_losses.append(loss) if FLAGS.quantize_delay_step >= 0: if FLAGS.num_clones > 1: raise ValueError('Quantization doesn\'t support multi-clone yet.') tf.contrib.quantize.create_training_graph( quant_delay=FLAGS.quantize_delay_step) for i in range(FLAGS.num_clones): with tf.device('/gpu:%d' % i): name_scope = ('clone_%d' % i) if i else '' with tf.name_scope(name_scope) as scope: grads = optimizer.compute_gradients(tower_losses[i]) tower_grads.append(grads) with tf.device('/cpu:0'): grads_and_vars = _average_gradients(tower_grads) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = tf.contrib.training.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Gather update_ops. These contain, for example, # the updates for the batch_norm variables created by model_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) update_ops.append(grad_updates) update_op = tf.group(*update_ops) total_loss = tf.losses.get_total_loss(add_regularization_losses=True) # Print total loss to the terminal. # This implementation is mirrored from tf.slim.summaries. should_log = math_ops.equal(math_ops.mod(global_step, FLAGS.log_steps), 0) total_loss = tf.cond( should_log, lambda: tf.Print(total_loss, [total_loss], 'Total loss is :'), lambda: total_loss) tf.summary.scalar('total_loss', total_loss) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Excludes summaries from towers other than the first one. summary_op = tf.summary.merge_all(scope='(?!clone_)') return train_tensor, summary_op
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) #step_log_probs",Tensor shape=(?, 10, 56136) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) #step_log_probs_masked (?, 10, 56136) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs #total_probs (?, 10, 56136) # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] lengths_to_add = array_ops.one_hot( indices=array_ops.tile(array_ops.reshape(end_token, [1, 1]), [batch_size, beam_width]), depth=vocab_size, on_value=constant_op.constant(0, dtype=dtypes.int64), off_value=constant_op.constant(1, dtype=dtypes.int64), dtype=dtypes.int64) #lengths_to_add shape=(?, 10, 56136) add_mask = (1 - math_ops.to_int64(previously_finished)) #add_mask shape=(?, 10), dtype=int64 lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add #lengths_to_add shape=(?, 10, 56136) new_prediction_lengths = (lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) #new_prediction_lengths shape=(?, 10, 56136) # Calculate the scores for each beam scores = _get_scores(log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) scores_mask = tf.constant([step_log_probs.dtype.min, 0], dtype=dtypes.float32, shape=[vocab_size], name='mask') scores_masked = tf.add(scores, scores_mask) scores_mask2 = tf.constant([0, 0, 0, 0, 0, step_log_probs.dtype.min, 0], dtype=dtypes.float32, shape=[vocab_size], name='mask2') scores_masked = tf.add(scores_mask2, scores_masked) def new_scores(scores_masked): scores_no_stop = tf.constant([0, 0, step_log_probs.dtype.min, 0], dtype=dtypes.float32, shape=[vocab_size], name='no_stop') scores = tf.add(scores_masked, scores_no_stop) return scores #constrain the length scores = control_flow_ops.cond( #time <9 , time < 0, lambda: new_scores(scores_masked), lambda: scores_masked) #scores shape=(?, 10, 56136) #[batch_size, beam_width, vocab_size] time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_shape = array_ops.shape(scores) #scores_shape" shape=(3,) scores_to_flat_1 = array_ops.reshape(scores, [batch_size, 2, -1]) print("scores_to_flat_1", scores_to_flat_1) scores_to_0 = scores[:, 0] scores_to_1 = scores[:, -1] scores_to_flat_2 = tf.concat([scores_to_0, scores_to_1], 1) scores_flat = control_flow_ops.cond( time > 0, lambda: scores_to_flat_1, lambda: array_ops.reshape(scores_to_flat_2, [batch_size, 2, -1])) num_available_beam = control_flow_ops.cond( time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]), lambda: math_ops.reduce_prod(scores_shape[2:])) #scores_flat", shape=(?, ?) #num_available_beam" shape=() # Pick the next beams according to the specified successors function next_beam_size = math_ops.minimum( ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width"), num_available_beam) #scores_t = tf.reshape(scores_flat,[batch_size,2,-1]) ############################ #input_words=['entrencheds01', 'entrencheds02', 'forgev01', 'forgev04', \ # 'hitn02', 'hitn03', 'vaultn02', 'vaultn04', 'deepa03', \ # 'deeps02', 'admitv01', 'admitv02', 'plantn01', 'plantn02',\ # 'squaren01', 'squaren05', 'drawv05', 'drawv06', 'spellv03', \ # 'spellv02', 'shotn02', 'shotn04', 'coachv01', 'coachv02', 'casen05',\ # 'casen09', 'focusn01', 'focusn02', 'tasten01', 'tasten04', 'footn01', \ # 'footv01'] input_words = get_words() return_list = prior_scores(input_words) return_array = np.array(return_list) return_tensor = tf.convert_to_tensor(return_array) tiling = [1, 5, 1] prior_mask = tf.tile(tf.expand_dims(return_tensor, 1), tiling) prior_mask = tf.cast(prior_mask, tf.float32) prior_mask = array_ops.reshape(prior_mask, [batch_size, -1]) #print ("prior_mask",prior_mask) scores_sum = tf.reduce_sum(scores_to_flat_1, 1) #print ("scores_sum_1",scores_sum) #def cal_scores_sum(scores_sum,prior_mask): # return tf.add(scores_sum,prior_mask) #scores_sum = control_flow_ops.cond( # time > 0, # lambda: cal_scores_sum(scores_sum,prior_mask), # lambda: scores_sum) #scores_sum=tf.add(scores_sum,prior_mask) #print ("scores_sum_2",scores_sum) ############################ #scores_final=tf.concat([scores_sum, scores_sum],1) def cal_scores_indices(scores_to_0, scores_to_1): next_beam_scores_1, word_indices_1 = nn_ops.top_k(scores_to_0, k=5) print("ori next_beam_scores_1,word_indices_1", next_beam_scores_1) print("ori word_indices_1", word_indices_1) next_beam_scores_2, word_indices_2 = nn_ops.top_k(scores_to_1, k=5) next_beam_scores = tf.concat([next_beam_scores_1, next_beam_scores_2], 1) word_indices = tf.concat( [word_indices_1, word_indices_2 + 9 * vocab_size], 1) return next_beam_scores, word_indices def cal_scores_indices_t1(scores_final, next_beam_size): next_beam_scores_1, word_indices_1 = nn_ops.top_k(scores_final, k=5) #next_beam_scores_1, word_indices_1=sample(next_beam_scores_1,word_indices_1) print("next_beam_scores_1", next_beam_scores_1) print("word_indices_1", word_indices_1) next_beam_scores = tf.concat([next_beam_scores_1, next_beam_scores_1], 1) word_indices = tf.concat( [word_indices_1, word_indices_1 + 5 * vocab_size], 1) return next_beam_scores, word_indices next_beam_scores, word_indices = control_flow_ops.cond( time > 0, lambda: cal_scores_indices_t1(scores_sum, next_beam_size), lambda: cal_scores_indices(scores_to_0, scores_to_1)) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) #shape=(?, ?) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper(gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod(word_indices, vocab_size, name="next_beam_word_ids") #raw_next_word_ids shape=(?, 10) next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32(word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or(previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged # 2. Beams that are now finished (EOS predicted) remain unchanged # 3. Beams that are not yet finished have their length increased by 1 lengths_to_add = math_ops.to_int64( math_ops.not_equal(next_word_ids, end_token)) lengths_to_add = (1 - math_ops.to_int64(next_finished)) * lengths_to_add next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState(cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) print('next_beam_probs', next_beam_probs) output = BeamSearchDecoderOutput(scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def testFilterRange(self): dataset = dataset_ops.Dataset.range(4) dataset = self.apply_filter( dataset, lambda x: math_ops.not_equal(math_ops.mod(x, 3), 2)) self.assertDatasetProduces(dataset, expected_output=[0, 1, 3])
def train_y( train_op, logdir, train_step_fn=train_step_y, train_step_kwargs=_USE_DEFAULT, train_step_kwargs_extra=None, # YY: i added these to be able to pass extra args for evaluation log_every_n_steps=1, eval_ops=None, # :YY num_evals=0, # YY eval_ops_valid=None, # :YY num_evals_valid=0, # YY graph=None, master='', is_chief=True, global_step=None, number_of_steps=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=_USE_DEFAULT, init_fn=None, ready_op=_USE_DEFAULT, summary_op=_USE_DEFAULT, save_summaries_secs=600, summary_writer=_USE_DEFAULT, startup_delay_steps=0, saver=None, save_interval_secs=600, # sync_optimizer=None, # :YY session_config=None, trace_every_n_steps=None): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: The directory where training logs are written to. If None, model checkpoints and summaries will not be written. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step and logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The address of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then slim.variables.get_or_create_global_step() is used. number_of_steps: The max number of gradient steps to take during training. If the value is left as None, training proceeds indefinitely. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.global_variables_initializer()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If left to its default value, then the session is initialized by calling `tf.local_variables_initializer()` and `tf.tables_initializer()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. ready_op: Operation to check if the model is ready to use. If left to its default value, then the session checks for readiness by calling `tf.report_uninitialized_variables()`. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. summary_writer: `SummaryWriter` to use. Can be `None` to indicate that no summaries should be written. If unset, we create a SummaryWriter. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If None, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. trace_every_n_steps: produce and save a `Timeline` in Chrome trace format and add it to the summaries every `trace_every_n_steps`. If None, no trace information will be produced or saved. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, if `number_of_steps` is negative, or if `trace_every_n_steps` is not `None` and no `logdir` is provided. """ if train_op is None: raise ValueError('train_op cannot be None.') if logdir is None: if summary_op != _USE_DEFAULT: raise ValueError('Cannot provide summary_op because logdir=None') if saver is not None: raise ValueError('Cannot provide saver because logdir=None') if trace_every_n_steps is not None: raise ValueError('Cannot provide trace_every_n_steps because ' 'logdir=None') if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = variables.get_or_create_global_step() saver = saver or tf_saver.Saver() with ops.name_scope('init_ops'): if init_op == _USE_DEFAULT: init_op = tf_variables.global_variables_initializer() if ready_op == _USE_DEFAULT: ready_op = tf_variables.report_uninitialized_variables() if local_init_op == _USE_DEFAULT: ## YY: I separated them, y_local_init_op = tf_variables.local_variables_initializer() ## end YY local_init_op = control_flow_ops.group( y_local_init_op, # YY # tf_variables.local_variables_initializer(), data_flow_ops.tables_initializer()) ready_for_local_init_op = None if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() if summary_writer == _USE_DEFAULT: summary_writer = supervisor.Supervisor.USE_DEFAULT if train_step_kwargs == _USE_DEFAULT: with ops.name_scope('train_step'): train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal( global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) if is_chief and trace_every_n_steps is not None: train_step_kwargs['should_trace'] = math_ops.equal( math_ops.mod(global_step, trace_every_n_steps), 0) train_step_kwargs['logdir'] = logdir ## YY: if train_step_kwargs_extra is not None: for name in train_step_kwargs_extra: train_step_kwargs[name] = train_step_kwargs_extra[name] ## :YY sv = supervisor.Supervisor(graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, ready_for_local_init_op=ready_for_local_init_op, ready_op=ready_op, summary_op=summary_op, summary_writer=summary_writer, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) if summary_writer is not None: train_step_kwargs['summary_writer'] = sv.summary_writer train_step_kwargs[ 'local_init_op'] = y_local_init_op # tf_variables.local_variables_initializer() #local_init_op ## YY I added this should_retry = True while should_retry: try: should_retry = False with sv.managed_session(master, start_standard_services=False, config=session_config) as sess: logging.info('Starting Session.') if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: _wait_for_step( sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxint)) sv.start_queue_runners(sess) logging.info('Starting Queues.') try: while not sv.should_stop(): total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs) if should_stop: logging.info('Stopping Training.') break except errors.OutOfRangeError: # OutOfRangeError is thrown when epoch limit per # tf.train.limit_epochs is reached. logging.info('Caught OutOfRangeError. Stopping Training.') if logdir and sv.is_chief: logging.info('Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) ## YY: evaluating here if eval_ops is not None: logging.info( '********* Starting evaluation on Training set, at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) evaluation_y.evaluate_loop_slim_streaming_metrics( sess, num_evals, eval_ops) summary_str = sess.run(summary_op) sv.summary_computed(sess, summary_str) logging.info( '********* Finished evaluation on Training set, at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) ## YY: starting evaluation on validation set if exist: if eval_ops_valid is not None: logging.info( '********* Starting evaluation on Validation set, at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) np_global_step = training_util.global_step( sess, global_step) evaluation_y.evaluate_loop_slim_streaming_metrics( sess, num_evals_valid, eval_ops_valid) summary_str = sess.run(summary_op) sv.summary_writer.add_summary(summary_str, np_global_step) logging.info( '********* Finished evaluation on Validation set, at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) # :YY except errors.AbortedError: # Always re-run on AbortedError as it indicates a restart of one of the # distributed tensorflow servers. logging.info('Retrying training!') should_retry = True return total_loss
def sample_symbols_new(logits, log_probs, finished, lengths, time): """ :param logits: [batch_size * beam_size, target_vocab_size] :param log_probs: [batch_size * beam_size, ] :param finished: [batch_size * beam_size, ] :param lengths: decoding length [batch_size * beam_size, ] :param time: :return: """ # [batch_size * beam_size,] prev_finished_float = math_ops.to_float(finished) # [batch_size * beam_size, ] prev_log_probs = log_probs # [batch_size * beam_size, target_vocab_size] probs = advanced_log_softmax(logits) # negative # mask the finished beam except only one entrance (target_eos_id) # [target_vocab_size, ]: [float_min, float_min, float_min, ..., 0] # this forces the beam with EOS continue to generate EOS finished_beam_bias = finished_beam_one_entry_bias(on_entry=eos_id, num_entries=vocab_size) # [batch_size * beam_size, target_vocab_size]: outer product finished_beam_bias = expand_to_beam_size(finished_beam_bias, beam_size * batch_size, axis=0) finished_beam_bias *= array_ops.expand_dims(prev_finished_float, 1) # compute new probs, with finished flags & mask probs = probs * array_ops.expand_dims(1. - prev_finished_float, 1) + finished_beam_bias # [batch_size * beam_size, target_vocab_size] # compute new log_probs log_probs = probs + array_ops.expand_dims(prev_log_probs, 1) # new decoding length: [batch_size * beam_size] lengths = lengths + 1 - math_ops.to_int32(finished) # compute beam score # length_penalty: [batch_size * beam_size,] length_penalty = math_ops.pow(((5.0 + math_ops.to_float(lengths)) / 6.0), -alpha) scores = log_probs * array_ops.expand_dims(length_penalty, axis=1) # flatten # [batch_size, beam_size * target_vocab_size] scores = array_ops.reshape(array_ops.reshape(scores, [-1]), [batch_size, -1]) ret_log_probs = array_ops.reshape(array_ops.reshape(log_probs, [-1]), [batch_size, -1]) scores_flat = control_flow_ops.cond( ops.convert_to_tensor(time) > 0, lambda: scores, # time > 0: all lambda: array_ops.slice(scores, [0, 0], [-1, vocab_size]) ) # time = 0: first logits in each batch # [batch_size, beam_size] will restore top live_k sample_scores, sample_ids = nn_ops.top_k(scores_flat, k=beam_size) ret_sample_ids = array_ops.reshape(sample_ids, [-1]) # flatten: [batch_size * beam_size,] sample_ids = array_ops.reshape(sample_ids, [-1]) # because we do topk to scores with dim:[batch, beam * vocab] # we need to cover the true word ids word_ids = math_ops.mod(sample_ids, vocab_size) # beam ids should be adjusted according to batch_size # batch_pos, [batch_size, beam_size]: [[0, 0, ...], [1, 1,...], [batch_size,...] ] batch_pos = compute_batch_indices(batch_size, beam_size) # compute new beam_ids, [batch_size * beam_size, ] beam_ids = math_ops.div(sample_ids, vocab_size) \ + array_ops.reshape(batch_pos * beam_size, [-1]) # we need to recover log_probs from score # flatten sample_scores: [batch_size * beam_size,] sample_scores_flatten = array_ops.reshape(sample_scores, [-1]) # gather each length penalty length_penalty = gather_states(length_penalty, beam_ids) # recover log probabilities next_log_probs = sample_scores_flatten / length_penalty # gather states according to beam_ids next_lengths = gather_states(lengths, beam_ids) # [batch_size * beam_size * vocab_size, ] log_probs_flat = array_ops.reshape(log_probs, [-1]) log_probs_index = array_ops.reshape( batch_pos, [-1]) * beam_size * vocab_size + sample_ids next_log_probs = array_ops.gather(log_probs_flat, log_probs_index) return word_ids, beam_ids, next_log_probs, next_lengths, ret_log_probs, ret_sample_ids, length_penalty
def even(x): return math_ops.equal(math_ops.mod(x, 2), 0)
def train(train_op, logdir, train_step_fn=train_step, train_step_kwargs=_USE_DEFAULT, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_steps=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=None, init_fn=None, summary_op=_USE_DEFAULT, save_summaries_secs=600, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: The directory where training logs are written to. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step and logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The BNS name of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then slim.variables.get_or_create_global_step() is used. number_of_steps: The max number of gradient steps to take during training. If the value is left as None, training proceeds indefinitely. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.initialize_all_variables()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If None, then the session is initialized by calling `tf.initialize_local_variables()` and `tf.initialize_all_tables()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If none, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, or if `number_of_steps` is negative. """ if train_op is None: raise ValueError('train_op cannot be None.') if sync_optimizer and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.' ) if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = variables.get_or_create_global_step() saver = saver or tf_saver.Saver() if init_op == _USE_DEFAULT: init_op = tf_variables.initialize_all_variables() if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() cleanup_op = None if is_chief and sync_optimizer: if not isinstance(sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer') # Need to create these BEFORE the supervisor finalizes the graph: with ops.control_dependencies([init_op]): init_tokens_op = sync_optimizer.get_init_tokens_op() init_op = init_tokens_op chief_queue_runner = sync_optimizer.get_chief_queue_runner() cleanup_op = sync_optimizer.get_clean_up_op() if train_step_kwargs == _USE_DEFAULT: train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal(global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) sv = supervisor.Supervisor(graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, summary_op=summary_op, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) with sv.managed_session(master, start_standard_services=False) as sess: if is_chief: sv.start_standard_services(sess) elif not is_chief and startup_delay_steps > 0: _wait_for_step( sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxint)) sv.start_queue_runners(sess) if is_chief and sync_optimizer: sv.start_queue_runners(sess, [chief_queue_runner]) try: while not sv.should_stop(): total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs) if should_stop: break finally: if sv.is_chief and cleanup_op is not None: sess.run(cleanup_op) # This waits for service threads to finish. sv.Stop() if sv.is_chief: logging.info('Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) return total_loss
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_biasCNN.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) dataset_val = dataset_biasCNN.get_dataset(FLAGS.dataset_name, 'validation', FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) network_fn_val = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing( preprocessing_name, is_training=True, flipLR=FLAGS.flipLR, random_scale=FLAGS.random_scale, is_windowed=FLAGS.is_windowed) image_preprocessing_fn_val = preprocessing_biasCNN.get_preprocessing( preprocessing_name, is_training=False, flipLR=FLAGS.flipLR, random_scale=FLAGS.random_scale, is_windowed=FLAGS.is_windowed) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) ############################################ # Create a provider for the validation set # ############################################ provider_val = slim.dataset_data_provider.DatasetDataProvider( dataset_val, shuffle=True, common_queue_capacity=2 * FLAGS.batch_size_val, common_queue_min=FLAGS.batch_size_val) [image_val, label_val] = provider_val.get(['image', 'label']) label_val -= FLAGS.labels_offset eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image_val = image_preprocessing_fn_val(image_val, eval_image_size, eval_image_size) images_val, labels_val = tf.train.batch( [image_val, label_val], batch_size=FLAGS.batch_size_val, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size_val) ############################### # Define the model (training) # ############################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() with tf.variable_scope('my_scope'): logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') slim.losses.softmax_cross_entropy( logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add( tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None if FLAGS.quantize_delay >= 0: tf.contrib.quantize.create_training_graph( quant_delay=FLAGS.quantize_delay) ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, total_num_replicas=FLAGS.worker_replicas, variable_averages=variable_averages, variables_to_average=moving_average_variables) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) ################################# # Define the model (validation) # ################################# with tf.variable_scope('my_scope', reuse=True): logits_val, _ = network_fn_val(images_val) predictions_val = tf.argmax(logits_val, 1) labels_val = tf.squeeze(labels_val) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions_val, labels_val), 'Recall_5': slim.metrics.streaming_recall_at_k(logits_val, labels_val, 5) }) for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection('summaries', op) # Gather validation summaries summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') # Create a non-default saver so we don't delete all the old checkpoints. my_saver = tf_saver.Saver( max_to_keep=FLAGS.max_checkpoints_to_keep, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, ) # Create a non-default dictionary of options for train_step_fn # This is a hack that lets us pass everything we need to run evaluation, into the training loop function from tensorflow.python.framework import ops from tensorflow.python.framework import constant_op from tensorflow.python.ops import math_ops with ops.name_scope('train_step'): train_step_kwargs = {} if FLAGS.max_number_of_steps: should_stop_op = math_ops.greater_equal( global_step, FLAGS.max_number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op if FLAGS.log_every_n_steps > 0: train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, FLAGS.log_every_n_steps), 0) train_step_kwargs['should_val'] = math_ops.equal( math_ops.mod(global_step, FLAGS.val_every_n_steps), 0) train_step_kwargs['eval_op'] = list(names_to_updates.values()) # assert(FLAGS.max_number_of_steps==100000) print(should_stop_op) ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None, saver=my_saver, train_step_fn=learning_biasCNN.train_step_fn, train_step_kwargs=train_step_kwargs)
def _shard_indices(self, keys): if self._key_dtype == dtypes.string: indices = string_ops.string_to_hash_bucket_fast(keys, self._num_shards) else: indices = math_ops.mod(keys, self._num_shards) return math_ops.cast(indices, dtypes.int32)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to global variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ global_old = set(n.op.name for n in variables.global_variables()) apply_updates = self._opt.apply_gradients(grads_and_vars) global_new = set(n.op.name for n in variables.global_variables()) with ops.control_dependencies([apply_updates]): local_update = state_ops.assign_add(self._local_step, 1, name='local_step_update').op # this is for place the variables created by optimizer to local collection # e.g., AdamOptimizer will create beta as global variables def _adjust_optimizer_variable_collection(opt_vars): g = ops.get_default_graph() idx = 0 for _ in range(len( g._collections[ops.GraphKeys.GLOBAL_VARIABLES])): var = g.get_collection_ref(ops.GraphKeys.GLOBAL_VARIABLES)[idx] name = var.op.name if name in opt_vars: ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var) del g.get_collection_ref( ops.GraphKeys.GLOBAL_VARIABLES)[idx] else: idx += 1 _adjust_optimizer_variable_collection(global_new - global_old) # update global variables. def _Update_global_variables(): local_vars = [v for g, v in grads_and_vars if g is not None] global_center_vars = [self._global_map[var] for var in local_vars] local_center_vars = [self._local_map[var] for var in local_vars] local_center_vars_update = [] for lvar, var in zip(local_center_vars, global_center_vars): local_center_vars_update.append(lvar.assign(var)) update_ops = [] differences = [] with ops.control_dependencies(local_center_vars_update): for v, lv in zip(local_vars, local_center_vars): with ops.device(v.device): differences.append(math_ops.subtract(v, lv)) for lvar, diff in zip(local_vars, differences): with ops.device(lvar.device): update_ops.append( state_ops.assign_sub( lvar, math_ops.multiply(self._moving_rate, diff))) for var, diff in zip(global_center_vars, differences): with ops.device(var.device): update_ops.append( state_ops.assign_add( var, math_ops.multiply(self._moving_rate, diff))) if global_step: with ops.colocate_with(global_step): update_ops.append(state_ops.assign_add(global_step, 1)) variable_update = control_flow_ops.group(*(update_ops)) return variable_update with ops.control_dependencies([local_update]): condition = math_ops.equal( math_ops.mod(self._local_step, self._period), 0) conditional_update = control_flow_ops.cond( condition, _Update_global_variables, control_flow_ops.no_op) return conditional_update
def _apply_gradient(self, grad, var, indices=None): """The main function to update a variable. Args: grad: A Tensor containing gradient to apply. var: A Tensor containing the variable to update. indices: An array of integers, for sparse update. Returns: Updated variable var = var - learning_rate * preconditioner * grad If the gradient is dense, var and grad have the same shape. If the update is sparse, then the first dimension of the gradient and var may differ, others are all the same. In this case the indices array provides the set of indices of the variable which are to be updated with each row of the gradient. """ global_step = self._global_step + 1 # Update accumulated weighted average of gradients gbar = self.get_slot(var, "gbar") gbar_decay_t = GetParam(self._gbar_decay, global_step) gbar_weight_t = GetParam(self._gbar_weight, global_step) if indices is not None: # Note - the sparse update is not easily implemented, since the # algorithm needs all indices of gbar to be updated # if mat_gbar_decay != 1 or mat_gbar_decay != 0. # One way to make mat_gbar_decay = 1 is by rescaling. # If we want the update: # G_{t+1} = a_{t+1} G_t + b_{t+1} w_t # define: # r_{t+1} = a_{t+1} * r_t # h_t = G_t / r_t # Then: # h_{t+1} = h_t + (b_{t+1} / r_{t+1}) * w_t # So we get the mat_gbar_decay = 1 as desired. # We can implement this in a future version as needed. # However we still need gbar_decay = 0, otherwise all indices # of the variable will need to be updated. if self._gbar_decay != 0.0: tf_logging.warning("Not applying momentum for variable: %s" % var.name) gbar_updated = grad else: gbar_updated = self._weighted_average(gbar, self._gbar_decay, gbar_decay_t, gbar_weight_t * grad) # Update the preconditioners and compute the preconditioned gradient shape = var.get_shape() mat_g_list = [] for i in range(len(shape)): mat_g_list.append(self.get_slot(var, "Gbar_" + str(i))) mat_gbar_decay_t = GetParam(self._mat_gbar_decay, global_step) mat_gbar_weight_t = GetParam(self._mat_gbar_weight, global_step) preconditioned_grad = gbar_updated v_rank = len(mat_g_list) neg_alpha = -GetParam(self._alpha, global_step) / v_rank svd_interval = GetParam(self._svd_interval, global_step) precond_update_interval = GetParam(self._precond_update_interval, global_step) for i, mat_g in enumerate(mat_g_list): # axes is the list of indices to reduce - everything but the current i. axes = list(range(i)) + list(range(i + 1, v_rank)) if shape[i] <= self._max_matrix_size: # If the tensor size is sufficiently small perform full Shampoo update # Note if precond_update_interval > 1 and mat_gbar_decay_t != 1, this # is not strictly correct. However we will use it for now, and # fix if needed. (G_1 = aG + bg ==> G_n = a^n G + (1+a+..+a^{n-1})bg) # pylint: disable=g-long-lambda,cell-var-from-loop mat_g_updated = control_flow_ops.cond( math_ops.mod(global_step, precond_update_interval) < 1, lambda: self._update_mat_g( mat_g, grad, axes, mat_gbar_decay_t, mat_gbar_weight_t * precond_update_interval, i), lambda: mat_g) mat_g_updated = mat_g_updated / float(shape[i].value) if self._svd_interval == 1: mat_h = self._compute_power(var, mat_g_updated, shape[i], neg_alpha) else: mat_h = control_flow_ops.cond( math_ops.mod(global_step, svd_interval) < 1, lambda: self._compute_power(var, mat_g_updated, shape[ i], neg_alpha, "H_" + str(i)), lambda: self.get_slot(var, "H_" + str(i))) # mat_h is a square matrix of size d_i x d_i # preconditioned_grad is a d_i x ... x d_n x d_0 x ... d_{i-1} tensor # After contraction with a d_i x d_i tensor # it becomes a d_{i+1} x ... x d_n x d_0 x ... d_i tensor # (the first dimension is contracted out, and the second dimension of # mat_h is appended). After going through all the indices, it becomes # a d_0 x ... x d_n tensor again. preconditioned_grad = math_ops.tensordot(preconditioned_grad, mat_h, axes=([0], [0]), name="precond_" + str(i)) else: # Tensor size is too large -- perform diagonal Shampoo update # Only normalize non-vector cases. if axes: normalizer = 1.0 if indices is not None else float( shape[i].value) grad_outer = math_ops.reduce_sum(grad * grad, axis=axes) / normalizer else: grad_outer = grad * grad if i == 0 and indices is not None: assert self._mat_gbar_decay == 1.0 mat_g_updated = state_ops.scatter_add( mat_g, indices, mat_gbar_weight_t * grad_outer) mat_h = math_ops.pow( array_ops.gather(mat_g_updated, indices) + self._epsilon, neg_alpha) else: mat_g_updated = self._weighted_average( mat_g, self._mat_gbar_decay, mat_gbar_decay_t, mat_gbar_weight_t * grad_outer) mat_h = math_ops.pow(mat_g_updated + self._epsilon, neg_alpha) # Need to do the transpose to ensure that the tensor becomes # a d_{i+1} x ... x d_n x d_0 x ... d_i tensor as described above. preconditioned_grad = array_ops.transpose( preconditioned_grad, perm=list(range(1, v_rank)) + [0]) * mat_h # Update the variable based on the Shampoo update learning_rate_t = GetParam(self._learning_rate, global_step) if indices is not None: var_updated = state_ops.scatter_add( var, indices, -learning_rate_t * preconditioned_grad) else: var_updated = state_ops.assign_sub( var, learning_rate_t * preconditioned_grad) return var_updated
def defaults_two(): return control_flow_ops.cond(math_ops.equal( math_ops.mod(x, 2), 0), multiply, divide, name="cond_mult")
def rotate_transpose(x, shift, name="rotate_transpose"): """Circularly moves dims left or right. Effectively identical to: ```python numpy.transpose(x, numpy.roll(numpy.arange(len(x.shape)), shift)) ``` When `validate_args=False` additional graph-runtime checks are performed. These checks entail moving data from to GPU to CPU. Example: ```python x = ... # Tensor of shape [1, 2, 3, 4]. rotate_transpose(x, -1) # result shape: [2, 3, 4, 1] rotate_transpose(x, -2) # result shape: [3, 4, 1, 2] rotate_transpose(x, 1) # result shape: [4, 1, 2, 3] rotate_transpose(x, 2) # result shape: [3, 4, 1, 2] rotate_transpose(x, 7) == rotate_transpose(x, 3) rotate_transpose(x, -7) == rotate_transpose(x, -3) ``` Args: x: `Tensor`. shift: `Tensor`. Number of dimensions to transpose left (shift<0) or transpose right (shift>0). name: `String`. The name to give this op. Returns: rotated_x: Input `Tensor` with dimensions circularly rotated by shift. Raises: TypeError: if shift is not integer type. """ with ops.name_scope(name, values=[x, shift]): x = ops.convert_to_tensor(x, name="x") shift = ops.convert_to_tensor(shift, name="shift") # We do not assign back to preserve constant-ness. check_ops.assert_integer(shift) shift_value_static = tensor_util.constant_value(shift) ndims = x.get_shape().ndims if ndims is not None and shift_value_static is not None: if ndims < 2: return x shift_value_static = np.sign(shift_value_static) * ( abs(shift_value_static) % ndims) if shift_value_static == 0: return x perm = np.roll(np.arange(ndims), shift_value_static) return array_ops.transpose(x, perm=perm) else: # Consider if we always had a positive shift, and some specified # direction. # When shifting left we want the new array: # last(x, n-shift) + first(x, shift) # and if shifting right then we want: # last(x, shift) + first(x, n-shift) # Observe that last(a) == slice(a, n) and first(a) == slice(0, a). # Also, we can encode direction and shift as one: direction * shift. # Combining these facts, we have: # a = cond(shift<0, -shift, n-shift) # last(x, n-a) + first(x, a) == x[a:n] + x[0:a] # Finally, we transform shift by modulo length so it can be specified # independently from the array upon which it operates (like python). ndims = array_ops.rank(x) shift = array_ops.where(math_ops.less(shift, 0), math_ops.mod(-shift, ndims), ndims - math_ops.mod(shift, ndims)) first = math_ops.range(0, shift) last = math_ops.range(shift, ndims) perm = array_ops.concat((last, first), 0) return array_ops.transpose(x, perm=perm)
def f(x1, x2): if x1.dtype == dtypes.bool: assert x2.dtype == dtypes.bool x1 = math_ops.cast(x1, dtypes.int8) x2 = math_ops.cast(x2, dtypes.int8) return math_ops.mod(x1, x2)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This contains most of the synchronization implementation and also wraps the apply_gradients() from the real optimizer. The chief work updates global variables. Args: grads_and_vars: List of (gradient, variable) pairs as returned by compute_gradients(). global_step: Optional Variable to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the Optimizer constructor. Returns: A conditional 'Operation' that update both local and global variables or just local variables Raises: ValueError: If the grads_and_vars is empty. ValueError: If global step is not provided, the staleness cannot be checked. """ # update local variables if not grads_and_vars: raise ValueError("Must supply at least one variable") if global_step is None: raise ValueError("Global step is required") apply_updates = self._opt.apply_gradients(grads_and_vars) with ops.control_dependencies([apply_updates]): local_update = state_ops.assign_add( self._local_step, 1, name="local_step_update").op # update global variables. def _update_global_variables(): # pylint: disable=missing-docstring local_vars = [v for g, v in grads_and_vars if g is not None] global_vars = [self._local_2_global[v] for v in local_vars] # sync queue with ops.colocate_with(global_step): sync_queue = data_flow_ops.FIFOQueue( -1, [dtypes.bool], shapes=[[]], shared_name="sync_queue") train_ops = [] aggregated_vars = [] with ops.name_scope(None, self._name + "/global"): for var, gvar in zip(local_vars, global_vars): # pylint: disable=protected-access with ops.device(gvar.device): if isinstance(var._ref(), ops.Tensor): var_accum = data_flow_ops.ConditionalAccumulator( var.dtype, shape=var.get_shape(), shared_name=gvar.name + "/var_accum") train_ops.append( var_accum.apply_grad(var._ref(), local_step=global_step)) aggregated_vars.append(var_accum.take_grad(self._num_worker)) else: raise ValueError("Unknown local variable type!") self._accumulator_list.append((var_accum, gvar.device)) # chief worker updates global vars and enqueues tokens to the sync queue if self._is_chief: update_ops = [] with ops.control_dependencies(train_ops): for avg_var, gvar in zip(aggregated_vars, global_vars): with ops.device(gvar.device): update_ops.append(state_ops.assign(gvar, avg_var)) with ops.device(global_step.device): update_ops.append(state_ops.assign_add(global_step, 1)) with ops.control_dependencies(update_ops), ops.device( global_step.device): tokens = array_ops.fill([self._num_worker - 1], constant_op.constant(False)) sync_op = sync_queue.enqueue_many(tokens) else: with ops.control_dependencies(train_ops), ops.device( global_step.device): sync_op = sync_queue.dequeue() with ops.control_dependencies([sync_op]): local_update_op = self._local_vars_update(local_vars) return local_update_op with ops.control_dependencies([local_update]): condition = math_ops.equal( math_ops.mod(self._local_step, self._interval_steps), 0) conditional_update = control_flow_ops.cond( condition, _update_global_variables, control_flow_ops.no_op) chief_init_ops = [] for accum, dev in self._accumulator_list: with ops.device(dev): chief_init_ops.append( accum.set_global_step(global_step, name="SetGlobalStep")) self._chief_init_op = control_flow_ops.group(*(chief_init_ops)) return conditional_update
def main(_): #tf.disable_v2_behavior() ### tf.compat.v1.disable_eager_execution() tf.compat.v1.enable_resource_variables() # Enable habana bf16 conversion pass if FLAGS.dtype == 'bf16': os.environ['TF_BF16_CONVERSION'] = flags.FLAGS.bf16_config_path FLAGS.precision = 'bf16' else: os.environ['TF_BF16_CONVERSION'] = "0" if FLAGS.use_horovod: hvd_init() if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True, use_grayscale=FLAGS.use_grayscale) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') slim.losses.softmax_cross_entropy( logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add( tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None #if FLAGS.quantize_delay >= 0: # quantize.create_training_graph(quant_delay=FLAGS.quantize_delay) #for debugging!! ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, total_num_replicas=FLAGS.worker_replicas, variable_averages=variable_averages, variables_to_average=moving_average_variables) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') if horovod_enabled(): hvd.broadcast_global_variables(0) ########################### # Kicks off the training. # ########################### with dump_callback(): with logger.benchmark_context(FLAGS): eps1 = ExamplesPerSecondKerasHook(FLAGS.log_every_n_steps, output_dir=FLAGS.train_dir, batch_size=FLAGS.batch_size) write_hparams_v1( eps1.writer, { 'batch_size': FLAGS.batch_size, **{x: getattr(FLAGS, x) for x in FLAGS} }) train_step_kwargs = {} if FLAGS.max_number_of_steps: should_stop_op = math_ops.greater_equal( global_step, FLAGS.max_number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op if FLAGS.log_every_n_steps > 0: train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, FLAGS.log_every_n_steps), 0) eps1.on_train_begin() train_step_kwargs['EPS'] = eps1 slim.learning.train( train_tensor, logdir=FLAGS.train_dir, train_step_fn=train_step1, train_step_kwargs=train_step_kwargs, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, summary_writer=None, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def train(train_op, logdir, train_step_fn=train_step, train_step_kwargs=_USE_DEFAULT, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_steps=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=_USE_DEFAULT, init_fn=None, ready_op=_USE_DEFAULT, summary_op=_USE_DEFAULT, save_summaries_secs=600, summary_writer=_USE_DEFAULT, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None, session_config=None, session_wrapper=None, trace_every_n_steps=None, ignore_live_threads=False): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: The directory where training logs are written to. If None, model checkpoints and summaries will not be written. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step are logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The address of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then training_util.get_or_create_global_step(), that is, tf.contrib.framework.global_step() is used. number_of_steps: The max number of gradient steps to take during training, as measured by 'global_step': training will stop if global_step is greater than 'number_of_steps'. If the value is left as None, training proceeds indefinitely. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.global_variables_initializer()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If left to its default value, then the session is initialized by calling `tf.local_variables_initializer()` and `tf.tables_initializer()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. ready_op: Operation to check if the model is ready to use. If left to its default value, then the session checks for readiness by calling `tf.report_uninitialized_variables()`. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. summary_writer: `SummaryWriter` to use. Can be `None` to indicate that no summaries should be written. If unset, we create a SummaryWriter. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If None, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer, or a list of them. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. session_wrapper: A function that takes a `tf.Session` object as the only argument and returns a wrapped session object that has the same methods that the original object has, or `None`. Iff not `None`, the wrapped object will be used for training. trace_every_n_steps: produce and save a `Timeline` in Chrome trace format and add it to the summaries every `trace_every_n_steps`. If None, no trace information will be produced or saved. ignore_live_threads: If `True` ignores threads that remain running after a grace period when stopping the supervisor, instead of raising a RuntimeError. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, if `number_of_steps` is negative, or if `trace_every_n_steps` is not `None` and no `logdir` is provided. """ if train_op is None: raise ValueError('train_op cannot be None.') if logdir is None: if summary_op != _USE_DEFAULT: raise ValueError('Cannot provide summary_op because logdir=None') if saver is not None: raise ValueError('Cannot provide saver because logdir=None') if trace_every_n_steps is not None: raise ValueError('Cannot provide trace_every_n_steps because ' 'logdir=None') if isinstance(sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): sync_optimizer = [sync_optimizer] if sync_optimizer is not None and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.') if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = training_util.get_or_create_global_step() saver = saver or tf_saver.Saver() if sync_optimizer is not None: for opt in sync_optimizer: if not isinstance(opt, sync_replicas_optimizer.SyncReplicasOptimizer): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer.') with ops.name_scope('init_ops'): if init_op == _USE_DEFAULT: init_op = variables.global_variables_initializer() if ready_op == _USE_DEFAULT: ready_op = variables.report_uninitialized_variables() if local_init_op == _USE_DEFAULT: local_init_op = control_flow_ops.group( variables.local_variables_initializer(), lookup_ops.tables_initializer()) if sync_optimizer is not None and isinstance(sync_optimizer, list): with ops.control_dependencies([local_init_op] if local_init_op is not None else []): if is_chief: local_init_op = control_flow_ops.group( *[opt.chief_init_op for opt in sync_optimizer]) else: local_init_op = control_flow_ops.group( *[opt.local_step_init_op for opt in sync_optimizer]) ready_for_local_init_op = control_flow_ops.group( *[opt.ready_for_local_init_op for opt in sync_optimizer]) else: ready_for_local_init_op = None if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() if summary_writer == _USE_DEFAULT: summary_writer = supervisor.Supervisor.USE_DEFAULT if is_chief and sync_optimizer is not None: # Need to create these BEFORE the supervisor finalizes the graph: init_tokens_op = [opt.get_init_tokens_op() for opt in sync_optimizer] chief_queue_runner = [ opt.get_chief_queue_runner() for opt in sync_optimizer] if train_step_kwargs == _USE_DEFAULT: with ops.name_scope('train_step'): train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal(global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op if log_every_n_steps > 0: train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) if is_chief and trace_every_n_steps is not None: train_step_kwargs['should_trace'] = math_ops.equal( math_ops.mod(global_step, trace_every_n_steps), 0) train_step_kwargs['logdir'] = logdir sv = supervisor.Supervisor( graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, ready_for_local_init_op=ready_for_local_init_op, ready_op=ready_op, summary_op=summary_op, summary_writer=summary_writer, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) if summary_writer is not None: train_step_kwargs['summary_writer'] = sv.summary_writer total_loss = None should_retry = True while should_retry: try: should_retry = False with sv.managed_session( master, start_standard_services=False, config=session_config) as sess: logging.info('Starting Session.') if session_wrapper is not None: logging.info( 'Wrapping session with wrapper function: %s', session_wrapper) sess = session_wrapper(sess) if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: # (use sys.maxsize because sys.maxint doesn't exist in Python 3) _wait_for_step(sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxsize)) threads = sv.start_queue_runners(sess) logging.info('Starting Queues.') if is_chief and sync_optimizer is not None: sv.start_queue_runners(sess, chief_queue_runner) sess.run(init_tokens_op) try: while not sv.should_stop(): total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs) if should_stop: logging.info('Stopping Training.') sv.request_stop() break except errors.OutOfRangeError as e: # OutOfRangeError is thrown when epoch limit per # tf.train.limit_epochs is reached. logging.info('Caught OutOfRangeError. Stopping Training. %s', e) if logdir and sv.is_chief: logging.info('Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) sv.stop( threads, close_summary_writer=True, ignore_live_threads=ignore_live_threads) except errors.AbortedError: # Always re-run on AbortedError as it indicates a restart of one of the # distributed tensorflow servers. logging.info('Retrying training!') should_retry = True return total_loss
def filter_fn(elem_index, _): mod_result = math_ops.mod(elem_index, task_spec.num_workers) return math_ops.equal(mod_result, task_spec.index)
def train(train_op, logdir, metric_op=None, metric_collection_name=None, train_step_fn=train_step, train_step_kwargs=_USE_DEFAULT, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_samples=None, number_of_steps=None, number_of_epochs=None, batch_size=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=_USE_DEFAULT, init_fn=None, ready_op=_USE_DEFAULT, summary_op=_USE_DEFAULT, save_summaries_secs=600, summary_writer=_USE_DEFAULT, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None, session_config=None, trace_every_n_steps=None): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. metric_op: A `Tensor` that, when executed, will update the streaming_metrics ops. metric_collection_name: The name associated with the metric_op. logdir: The directory where training logs are written to. If None, model checkpoints and summaries will not be written. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step and logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The address of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then slim.variables.get_or_create_global_step() is used. number_of_steps: The max number of gradient steps to take during training, as measured by 'global_step': training will stop if global_step is greater than 'number_of_steps'. If the value is left as None, training proceeds indefinitely. number_of_epochs: The total number of epochs per training. batch_size: The number of samples in each batch. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.global_variables_initializer()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If left to its default value, then the session is initialized by calling `tf.local_variables_initializer()` and `tf.tables_initializer()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. ready_op: Operation to check if the model is ready to use. If left to its default value, then the session checks for readiness by calling `tf.report_uninitialized_variables()`. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. summary_writer: `SummaryWriter` to use. Can be `None` to indicate that no summaries should be written. If unset, we create a SummaryWriter. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If None, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. trace_every_n_steps: produce and save a `Timeline` in Chrome trace format and add it to the summaries every `trace_every_n_steps`. If None, no trace information will be produced or saved. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, if `number_of_steps` is negative, or if `trace_every_n_steps` is not `None` and no `logdir` is provided. """ # Check if the calculation of some metrics is desired. if metric_op is not None: # Check the necessary requirements if metric_collection_name is None: raise ValueError('metric_collection_name must be fed and cannot be No') if number_of_samples is None: raise ValueError('number_of_samples must be fed and cannot be No') if number_of_steps is None: raise ValueError('number_of_steps must be fed and cannot be No') if number_of_epochs is None: raise ValueError('number_of_epochs must be fed and cannot be No') if batch_size is None: raise ValueError('batch_size must be fed and cannot be None') if train_op is None: raise ValueError('train_op cannot be None.') if logdir is None: if summary_op != _USE_DEFAULT: raise ValueError('Cannot provide summary_op because logdir=None') if saver is not None: raise ValueError('Cannot provide saver because logdir=None') if trace_every_n_steps is not None: raise ValueError('Cannot provide trace_every_n_steps because ' 'logdir=None') if sync_optimizer is not None and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.') if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = variables.get_or_create_global_step() saver = saver or tf_saver.Saver() with ops.name_scope('init_ops'): if init_op == _USE_DEFAULT: init_op = tf_variables.global_variables_initializer() if ready_op == _USE_DEFAULT: ready_op = tf_variables.report_uninitialized_variables() if local_init_op == _USE_DEFAULT: local_init_op = control_flow_ops.group( tf_variables.local_variables_initializer(), lookup_ops.tables_initializer()) if sync_optimizer is not None and isinstance( sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): with ops.control_dependencies([local_init_op] if local_init_op is not None else []): if is_chief: local_init_op = sync_optimizer.chief_init_op else: local_init_op = sync_optimizer.local_step_init_op ready_for_local_init_op = sync_optimizer.ready_for_local_init_op else: ready_for_local_init_op = None if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() if summary_writer == _USE_DEFAULT: summary_writer = supervisor.Supervisor.USE_DEFAULT if is_chief and sync_optimizer is not None: if not isinstance(sync_optimizer, (sync_replicas_optimizer.SyncReplicasOptimizer)): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer.') # Need to create these BEFORE the supervisor finalizes the graph: init_tokens_op = sync_optimizer.get_init_tokens_op() chief_queue_runner = sync_optimizer.get_chief_queue_runner() if train_step_kwargs == _USE_DEFAULT: with ops.name_scope('train_step'): train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal(global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) if is_chief and trace_every_n_steps is not None: train_step_kwargs['should_trace'] = math_ops.equal( math_ops.mod(global_step, trace_every_n_steps), 0) train_step_kwargs['logdir'] = logdir if number_of_samples is not None and batch_size is not None: train_step_kwargs['num_batches_per_epoch'] = int(number_of_samples / float(batch_size)) if number_of_samples is not None and batch_size is not None: train_step_kwargs['num_steps_per_epoch'] = int(number_of_steps / float(number_of_epochs)) # If metric calculation is desired. if metric_op is not None: # The reset_op is defined for resetting the streaming_variables(streaming_acurracy,...) # The reason for defining it here is that the supervisor finalized the graph and the graph will be fixed after it. # By calling the reset_op in the train_step function, after each epoch the total & count variables will reset to zero. # This help to have the averaged accuracy per epoch which is useful to realize if we are getting to the highest accuracy in the training. stream_vars = [i for i in tf.local_variables() if i.name.split('/')[1] == metric_collection_name] reset_op = tf.variables_initializer(stream_vars) sv = supervisor.Supervisor( graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, ready_for_local_init_op=ready_for_local_init_op, ready_op=ready_op, summary_op=summary_op, summary_writer=summary_writer, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) if summary_writer is not None: train_step_kwargs['summary_writer'] = sv.summary_writer should_retry = True while should_retry: try: should_retry = False with sv.managed_session( master, start_standard_services=False, config=session_config) as sess: logging.info('Starting Session.') if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: _wait_for_step(sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxint)) sv.start_queue_runners(sess) logging.info('Starting Queues.') if is_chief and sync_optimizer is not None: sv.start_queue_runners(sess, [chief_queue_runner]) sess.run(init_tokens_op) try: while not sv.should_stop(): if metric_op is not None: total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs, metric_op, reset_op) else: total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs) if should_stop: logging.info('Stopping Training.') break except errors.OutOfRangeError: # OutOfRangeError is thrown when epoch limit per # tf.train.limit_epochs is reached. logging.info('Caught OutOfRangeError. Stopping Training.') if logdir and sv.is_chief: logging.info('Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) except errors.AbortedError: # Always re-run on AbortedError as it indicates a restart of one of the # distributed tensorflow servers. logging.info('Retrying training!') should_retry = True return total_loss
def mod(): with ops.device('/device:GPU:0'): a = constant_op.constant(1.0) b = constant_op.constant(1.0) return math_ops.mod(a, b)
def atrous_conv2d(value, filters, rate, padding, name=None): """Atrous convolution (a.k.a. convolution with holes or dilated convolution). Computes a 2-D atrous convolution, also known as convolution with holes or dilated convolution, given 4-D `value` and `filters` tensors. If the `rate` parameter is equal to one, it performs regular 2-D convolution. If the `rate` parameter is greater than one, it performs convolution with holes, sampling the input values every `rate` pixels in the `height` and `width` dimensions. This is equivalent to convolving the input with a set of upsampled filters, produced by inserting `rate - 1` zeros between two consecutive values of the filters along the `height` and `width` dimensions, hence the name atrous convolution or convolution with holes (the French word trous means holes in English). More specifically: output[b, i, j, k] = sum_{di, dj, q} filters[di, dj, q, k] * value[b, i + rate * di, j + rate * dj, q] Atrous convolution allows us to explicitly control how densely to compute feature responses in fully convolutional networks. Used in conjunction with bilinear interpolation, it offers an alternative to `conv2d_transpose` in dense prediction tasks such as semantic image segmentation, optical flow computation, or depth estimation. It also allows us to effectively enlarge the field of view of filters without increasing the number of parameters or the amount of computation. For a description of atrous convolution and how it can be used for dense feature extraction, please see: [Semantic Image Segmentation with Deep Convolutional Nets and Fully Connected CRFs](http://arxiv.org/abs/1412.7062). The same operation is investigated further in [Multi-Scale Context Aggregation by Dilated Convolutions](http://arxiv.org/abs/1511.07122). Previous works that effectively use atrous convolution in different ways are, among others, [OverFeat: Integrated Recognition, Localization and Detection using Convolutional Networks](http://arxiv.org/abs/1312.6229) and [Fast Image Scanning with Deep Max-Pooling Convolutional Neural Networks] (http://arxiv.org/abs/1302.1700). Atrous convolution is also closely related to the so-called noble identities in multi-rate signal processing. There are many different ways to implement atrous convolution (see the refs above). The implementation here reduces atrous_conv2d(value, filters, rate, padding=padding) to the following three operations: paddings = ... net = space_to_batch(value, paddings, block_size=rate) net = conv2d(net, filters, strides=[1, 1, 1, 1], padding="VALID") crops = ... net = batch_to_space(net, crops, block_size=rate) Advanced usage. Note the following optimization: A sequence of `atrous_conv2d` operations with identical `rate` parameters, 'SAME' `padding`, and filters with odd heights/ widths: net = atrous_conv2d(net, filters1, rate, padding="SAME") net = atrous_conv2d(net, filters2, rate, padding="SAME") ... net = atrous_conv2d(net, filtersK, rate, padding="SAME") can be equivalently performed cheaper in terms of computation and memory as: pad = ... # padding so that the input dims are multiples of rate net = space_to_batch(net, paddings=pad, block_size=rate) net = conv2d(net, filters1, strides=[1, 1, 1, 1], padding="SAME") net = conv2d(net, filters2, strides=[1, 1, 1, 1], padding="SAME") ... net = conv2d(net, filtersK, strides=[1, 1, 1, 1], padding="SAME") net = batch_to_space(net, crops=pad, block_size=rate) because a pair of consecutive `space_to_batch` and `batch_to_space` ops with the same `block_size` cancel out when their respective `paddings` and `crops` inputs are identical. Args: value: A 4-D `Tensor` of type `float`. It needs to be in the default "NHWC" format. Its shape is `[batch, in_height, in_width, in_channels]`. filters: A 4-D `Tensor` with the same type as `value` and shape `[filter_height, filter_width, in_channels, out_channels]`. `filters`' `in_channels` dimension must match that of `value`. Atrous convolution is equivalent to standard convolution with upsampled filters with effective height `filter_height + (filter_height - 1) * (rate - 1)` and effective width `filter_width + (filter_width - 1) * (rate - 1)`, produced by inserting `rate - 1` zeros along consecutive elements across the `filters`' spatial dimensions. rate: A positive int32. The stride with which we sample input values across the `height` and `width` dimensions. Equivalently, the rate by which we upsample the filter values by inserting zeros across the `height` and `width` dimensions. In the literature, the same parameter is sometimes called `input stride` or `dilation`. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. name: Optional name for the returned tensor. Returns: A `Tensor` with the same type as `value`. Raises: ValueError: If input/output depth does not match `filters`' shape, or if padding is other than `'VALID'` or `'SAME'`. """ with ops.op_scope([value, filters], name, "atrous_conv2d") as name: value = ops.convert_to_tensor(value, name="value") filters = ops.convert_to_tensor(filters, name="filters") value_shape = value.get_shape() filter_shape = filters.get_shape() if not value_shape[3].is_compatible_with(filter_shape[2]): raise ValueError( "value's input channels does not match filters' input channels, " "{} != {}".format(value_shape[3], filter_shape[2])) if rate < 1: raise ValueError("rate {} cannot be less than one".format(rate)) if rate == 1: value = gen_nn_ops.conv2d(input=value, filter=filters, strides=[1, 1, 1, 1], padding=padding) return value # We have two padding contributions. The first is used for converting "SAME" # to "VALID". The second is required so that the height and width of the # zero-padded value tensor are multiples of rate. # Spatial dimensions of original input value_shape = array_ops.shape(value) in_height = value_shape[1] in_width = value_shape[2] # Spatial dimensions of the filters and the upsampled filters in which we # introduce (rate - 1) zeros between consecutive filter values. filter_height = int(filter_shape[0]) filter_width = int(filter_shape[1]) filter_height_up = filter_height + (filter_height - 1) * (rate - 1) filter_width_up = filter_width + (filter_width - 1) * (rate - 1) # Padding required to reduce to "VALID" convolution if padding == "SAME": pad_height = filter_height_up - 1 pad_width = filter_width_up - 1 elif padding == "VALID": pad_height = 0 pad_width = 0 else: raise ValueError("Invalid padding") # When padding is "SAME" and the pad_height (pad_width) is odd, we pad more # to bottom (right), following the same convention as conv2d(). pad_top = math_ops.floordiv(pad_height, 2) pad_bottom = pad_height - pad_top pad_left = math_ops.floordiv(pad_width, 2) pad_right = pad_width - pad_left # More padding so that rate divides the height and width of the input value in_height = in_height + pad_top + pad_bottom in_width = in_width + pad_left + pad_right mod_height = math_ops.mod(in_height, rate) mod_width = math_ops.mod(in_width, rate) null = constant_op.constant(0) pad_bottom_extra = control_flow_ops.cond( gen_math_ops.equal(mod_height, 0), lambda: null, lambda: rate - mod_height) pad_right_extra = control_flow_ops.cond( gen_math_ops.equal(mod_width, 0), lambda: null, lambda: rate - mod_width) # The paddings argument to space_to_batch includes both padding components pad_bottom = pad_bottom + pad_bottom_extra pad_right = pad_right + pad_right_extra space_to_batch_pad = [[pad_top, pad_bottom], [pad_left, pad_right]] value = array_ops.space_to_batch(input=value, paddings=space_to_batch_pad, block_size=rate) value = gen_nn_ops.conv2d(input=value, filter=filters, strides=[1, 1, 1, 1], padding="VALID", name=name) # The crops argument to batch_to_space is just the extra padding component batch_to_space_crop = [[0, pad_bottom_extra], [0, pad_right_extra]] value = array_ops.batch_to_space(input=value, crops=batch_to_space_crop, block_size=rate) return value
def rotate_transpose(x, shift, name="rotate_transpose"): """Circularly moves dims left or right. Effectively identical to: ```python numpy.transpose(x, numpy.roll(numpy.arange(len(x.shape)), shift)) ``` When `validate_args=False` additional graph-runtime checks are performed. These checks entail moving data from to GPU to CPU. Example: ```python x = ... # Tensor of shape [1, 2, 3, 4]. rotate_transpose(x, -1) # result shape: [2, 3, 4, 1] rotate_transpose(x, -2) # result shape: [3, 4, 1, 2] rotate_transpose(x, 1) # result shape: [4, 1, 2, 3] rotate_transpose(x, 2) # result shape: [3, 4, 1, 2] rotate_transpose(x, 7) == rotate_transpose(x, 3) rotate_transpose(x, -7) == rotate_transpose(x, -3) ``` Args: x: `Tensor`. shift: `Tensor`. Number of dimensions to transpose left (shift<0) or transpose right (shift>0). name: Python `str`. The name to give this op. Returns: rotated_x: Input `Tensor` with dimensions circularly rotated by shift. Raises: TypeError: if shift is not integer type. """ with ops.name_scope(name, values=[x, shift]): x = ops.convert_to_tensor(x, name="x") shift = ops.convert_to_tensor(shift, name="shift") # We do not assign back to preserve constant-ness. check_ops.assert_integer(shift) shift_value_static = tensor_util.constant_value(shift) ndims = x.get_shape().ndims if ndims is not None and shift_value_static is not None: if ndims < 2: return x shift_value_static = np.sign(shift_value_static) * ( abs(shift_value_static) % ndims) if shift_value_static == 0: return x perm = np.roll(np.arange(ndims), shift_value_static) return array_ops.transpose(x, perm=perm) else: # Consider if we always had a positive shift, and some specified # direction. # When shifting left we want the new array: # last(x, n-shift) + first(x, shift) # and if shifting right then we want: # last(x, shift) + first(x, n-shift) # Observe that last(a) == slice(a, n) and first(a) == slice(0, a). # Also, we can encode direction and shift as one: direction * shift. # Combining these facts, we have: # a = cond(shift<0, -shift, n-shift) # last(x, n-a) + first(x, a) == x[a:n] + x[0:a] # Finally, we transform shift by modulo length so it can be specified # independently from the array upon which it operates (like python). ndims = array_ops.rank(x) shift = array_ops.where(math_ops.less(shift, 0), math_ops.mod(-shift, ndims), ndims - math_ops.mod(shift, ndims)) first = math_ops.range(0, shift) last = math_ops.range(shift, ndims) perm = array_ops.concat([last, first], 0) return array_ops.transpose(x, perm=perm)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_biasCNN.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) dataset_val = dataset_biasCNN.get_dataset( FLAGS.dataset_name, 'validation', FLAGS.dataset_dir) ###################### # Select the network # ###################### if FLAGS.weights_initializer is None: weights_initializer = None # default value will be defined in argscope, it is xavier_initializer elif FLAGS.weights_initializer=='zeros': weights_initializer = tf.zeros_initializer() elif FLAGS.weights_initializer=='ones': weights_initializer = tf.ones_initializer() elif FLAGS.weights_initializer=='trunc_normal': weights_initializer = tf.truncated_normal_initializer() elif FLAGS.weights_initializer=='xavier': weights_initializer = initializers.xavier_initializer() elif FLAGS.weights_initializer=='var_scaling': weights_initializer = initializers.variance_scaling_initializer() else: raise ValueError('weight initializer not found') if FLAGS.biases_initializer is None: biases_initializer = None # default value will be defined in argscope, it is zeros_initializer elif biases_initializer=='zeros': biases_initializer = tf.zeros_initializer() elif FLAGS.biases_initializer=='ones': biases_initializer = tf.ones_initializer() elif FLAGS.biases_initializer=='trunc_normal': biases_initializer = tf.truncated_normal_initializer() elif FLAGS.biases_initializer=='xavier': biases_initializer = initializers.xavier_initializer() elif FLAGS.biases_initializer=='var_scaling': biases_initializer = initializers.variance_scaling_initializer() else: raise ValueError('biases initializer not found') network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, weights_initializer=weights_initializer, biases_initializer=biases_initializer, is_training=True) network_fn_val = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weights_initializer=weights_initializer, biases_initializer=biases_initializer, is_training=False) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing( preprocessing_name, is_training=True, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale, is_windowed = FLAGS.is_windowed) image_preprocessing_fn_val = preprocessing_biasCNN.get_preprocessing( preprocessing_name, is_training=False, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale, is_windowed=FLAGS.is_windowed) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) ############################################ # Create a provider for the validation set # ############################################ provider_val = slim.dataset_data_provider.DatasetDataProvider( dataset_val, shuffle=True, common_queue_capacity=2 * FLAGS.batch_size_val, common_queue_min=FLAGS.batch_size_val) [image_val, label_val] = provider_val.get(['image', 'label']) label_val -= FLAGS.labels_offset eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image_val = image_preprocessing_fn_val(image_val, eval_image_size, eval_image_size) images_val, labels_val = tf.train.batch( [image_val, label_val], batch_size=FLAGS.batch_size_val, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size_val) labels_val_onehot = slim.one_hot_encoding( labels_val, dataset.num_classes - FLAGS.labels_offset) ############################### # Define the model (training) # ############################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() with tf.variable_scope('my_scope'): logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') tf.losses.softmax_cross_entropy( labels, logits, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] # adding in a picture of the activations at each layer, this is a good way to double check that the rotated images look rotated to our eyes if 'conv' in end_point: dims = x.get_shape() for ii in range(5): summaries.add(tf.summary.image('image_out/' + end_point + '/image_' + str(ii), tf.slice(x,[ii,0,0,0],[1,dims[1],dims[2],1]))) summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None if FLAGS.quantize_delay >= 0: tf.contrib.quantize.create_training_graph( quant_delay=FLAGS.quantize_delay) ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, total_num_replicas=FLAGS.worker_replicas, variable_averages=variable_averages, variables_to_average=moving_average_variables) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) ################################# # Define the model (validation) # ################################# # get the validation set logits (predictions) with tf.variable_scope('my_scope',reuse=True): logits_val, _ = network_fn_val(images_val) predictions_val = tf.argmax(logits_val, 1) # Define loss on validation set, add a summary tf.losses.softmax_cross_entropy( labels_val_onehot, logits_val, label_smoothing=FLAGS.label_smoothing, weights=1.0, loss_collection = 'eval_losses') for loss in tf.get_collection('eval_losses'): summaries.add(tf.summary.scalar('eval_losses/%s' % loss.op.name, loss)) # Define the validation set metrics: # Will define each metric twice as separate operation. # One set will be made resettable, the other set will be streaming. with tf.name_scope('eval_metrics'): eval_acc_value, eval_acc_op = tf.metrics.accuracy(predictions=predictions_val,labels=labels_val) eval_recall_5_value, eval_recall_5_op = slim.metrics.streaming_recall_at_k(predictions=logits_val, labels=labels_val,k=5) # add these variables as summaries for tensorboard summaries.add(tf.summary.scalar('eval_recall_5', eval_recall_5_value)) summaries.add(tf.summary.scalar('eval_acc', eval_acc_value)) with tf.name_scope('eval_metrics_streaming'): eval_acc_streaming_value, eval_acc_streaming_op = tf.metrics.accuracy(predictions=predictions_val,labels=labels_val) eval_recall_5_streaming_value, eval_recall_5_streaming_op = slim.metrics.streaming_recall_at_k(predictions=logits_val, labels=labels_val,k=5) # add these variables as summaries for tensorboard summaries.add(tf.summary.scalar('eval_recall_5_streaming', eval_recall_5_streaming_value)) summaries.add(tf.summary.scalar('eval_acc_streaming', eval_acc_streaming_value)) # also add summaries of all the local variables used to compute the eval metrics... for metric in tf.get_collection(tf.GraphKeys.METRIC_VARIABLES, 'eval_metrics'): summaries.add(tf.summary.scalar('%s' % metric.op.name, metric)) for metric in tf.get_collection(tf.GraphKeys.METRIC_VARIABLES, 'eval_streaming_metrics'): summaries.add(tf.summary.scalar('%s' % metric.op.name, metric)) # gather up all the variables that are used to compute eval metrics stream_vars = [i for i in tf.local_variables() if i.name.split('/')[0]=='eval_metrics'] # make an operation that'll let us re-initialize just these vars. reset_op = tf.initialize_variables(stream_vars) # make an operation that'll let us run evaluation (all metrics) eval_op = list([eval_acc_op, eval_recall_5_op, eval_acc_streaming_op, eval_recall_5_streaming_op]) # Gather validation summaries summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Merge all summaries together (this includes training summaries too). summary_op = tf.summary.merge(list(summaries), name='summary_op') # Create a non-default saver so we don't delete all the old checkpoints. my_saver = tf_saver.Saver(max_to_keep=FLAGS.max_checkpoints_to_keep, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,) # Create a non-default dictionary of options for train_step_fn # This is a hack that lets us pass everything we need to run evaluation, into the training loop function with ops.name_scope('train_step'): train_step_kwargs = {} if FLAGS.max_number_of_steps: should_stop_op = math_ops.greater_equal(global_step, FLAGS.max_number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op if FLAGS.log_every_n_steps > 0: train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, FLAGS.log_every_n_steps), 0) train_step_kwargs['should_val'] = math_ops.equal( math_ops.mod(global_step, FLAGS.val_every_n_steps),0) train_step_kwargs['should_reset_eval_metrics'] = math_ops.equal( math_ops.mod(global_step, tf.to_int64(math_ops.multiply(FLAGS.reset_eval_metrics_every_n_vals, FLAGS.val_every_n_steps))),0) train_step_kwargs['eval_op'] = eval_op train_step_kwargs['reset_op'] = reset_op ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None, saver=my_saver, train_step_fn=learning_biasCNN.train_step_fn, train_step_kwargs = train_step_kwargs)
def defaults_two(): return control_flow_ops.cond( math_ops.equal(math_ops.mod(x, 2), 0), multiply, divide, name="cond_mult")
def _build_filter_range_graph(self, div): return dataset_ops.Dataset.range(100).filter( lambda x: math_ops.not_equal(math_ops.mod(x, div), 2))
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight, coverage_penalty_weight, constrained_matrix): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `ConstrainedBeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished not_finished = tf.logical_not(previously_finished) # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] #Amanda: Add Constrained #logits = tf.multiply(logits, constrained_matrix) #logits += constrained_matrix ################################################## step_log_probs = nn_ops.log_softmax(logits) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) total_probs = array_ops.expand_dims( beam_state.log_probs, 2 ) + step_log_probs # For end beam: set to [-inf,-inf,-inf,-inf], else: set to sum of historical log prob # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] #lengths_to_add = array_ops.one_hot( # indices=array_ops.tile( # array_ops.reshape(end_token, [1, 1]), [batch_size, beam_width]), # depth=vocab_size, # on_value=constant_op.constant(0, dtype=dtypes.int64), # off_value=constant_op.constant(1, dtype=dtypes.int64), # dtype=dtypes.int64) lengths_to_add = array_ops.one_hot(indices=array_ops.fill( [batch_size, beam_width], end_token), depth=vocab_size, on_value=np.int64(0), off_value=np.int64(1), dtype=tf.int64) add_mask = math_ops.to_int64(not_finished) #add_mask = (1 - math_ops.to_int64(previously_finished)) lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add new_prediction_lengths = (lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) #Amanda:Copy coverage penalty factor accumulated_attention_probs = None attention_probs = get_attention_probs(next_cell_state, coverage_penalty_weight) if attention_probs is not None: attention_probs *= tf.expand_dims(tf.to_float(not_finished), 2) accumulated_attention_probs = (beam_state.accumulated_attention_probs + attention_probs) # Calculate the scores for each beam scores = _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight, coverage_penalty_weight=coverage_penalty_weight, finished=previously_finished, accumulated_attention_probs=accumulated_attention_probs, constrained_matrix=constrained_matrix) time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_shape = array_ops.shape(scores) scores_flat = control_flow_ops.cond( time > 0, lambda: array_ops.reshape(scores, [batch_size, -1]), lambda: scores[:, 0]) num_available_beam = control_flow_ops.cond( time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]), lambda: math_ops.reduce_prod(scores_shape[2:])) # Pick the next beams according to the specified successors function next_beam_size = math_ops.minimum( ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width"), num_available_beam) next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper(gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod(word_indices, vocab_size, name="next_beam_word_ids") next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32(word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or(previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged # 2. Beams that are now finished (EOS predicted) remain unchanged # 3. Beams that are not yet finished have their length increased by 1 lengths_to_add = math_ops.to_int64( math_ops.not_equal(next_word_ids, end_token)) lengths_to_add = (1 - math_ops.to_int64(next_finished)) * lengths_to_add next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add next_accumulated_attention_probs = () if accumulated_attention_probs is not None: next_accumulated_attention_probs = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=accumulated_attention_probs, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1], name="next_accumulated_attention_probs") # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = ConstrainedBeamSearchDecoderState( cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished, accumulated_attention_probs=next_accumulated_attention_probs) output = ConstrainedBeamSearchDecoderOutput( #scores=next_beam_scores, scores=next_beam_probs, #scores = constrained_matrix, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This contains most of the synchronization implementation. Args: grads_and_vars: List of (local_vars, gradients) pairs. global_step: Variable to increment by one after the variables have been updated. We need it to check staleness. name: Optional name for the returned operation. Default to the name passed to the Optimizer constructor. Returns: train_op: The op to dequeue a token so the replicas can exit this batch and apply averages to local vars or an op to update vars locally. Raises: ValueError: If the grads_and_vars is empty. ValueError: If global step is not provided, the staleness cannot be checked. """ if not grads_and_vars: raise ValueError("Must supply at least one variable") if global_step is None: raise ValueError("Global step is required") # Generate copy of all trainable variables self._generate_shared_variables() # Wraps the apply_gradients op of the parent optimizer apply_updates = self._opt.apply_gradients(grads_and_vars, global_step) # This function will be called whenever the global_step divides interval steps def _apply_averages(): # pylint: disable=missing-docstring # Collect local and global vars local_vars = [v for g, v in grads_and_vars if g is not None] global_vars = ops.get_collection_ref("global_model") # sync queue, place it in the ps with ops.colocate_with(self._global_step): sync_queue = data_flow_ops.FIFOQueue(-1, [dtypes.bool], shapes=[[]], shared_name="sync_queue") train_ops = [] aggregated_vars = [] with ops.name_scope(None, self._name + "/global"): for var, gvar in zip(local_vars, global_vars): # pylint: disable=protected-access # Get reference to the tensor, this works with Variable and ResourceVariable var = ops.convert_to_tensor(var) # Place the accumulator in the same ps as the corresponding global_var with ops.device(gvar.device): var_accum = data_flow_ops.ConditionalAccumulator( var.dtype, shape=var.get_shape(), shared_name=gvar.name + "/var_accum") # Add op to push local_var to accumulator train_ops.append( var_accum.apply_grad(var, local_step=global_step)) # Op to average the vars in the accumulator aggregated_vars.append( var_accum.take_grad(self._replicas_to_aggregate)) # Remember accumulator and corresponding device self._accumulator_list.append((var_accum, gvar.device)) # chief worker updates global vars and enqueues tokens to the sync queue if self._is_chief: update_ops = [] # Make sure train_ops are run with ops.control_dependencies(train_ops): # Update global_vars with average values for avg_var, gvar in zip(aggregated_vars, global_vars): with ops.device(gvar.device): update_ops.append(state_ops.assign(gvar, avg_var)) # Update shared global_step with ops.device(global_step.device): update_ops.append( state_ops.assign_add(self._global_step, 1)) # After averaging, push tokens to the queue with ops.control_dependencies(update_ops), ops.device( global_step.device): tokens = array_ops.fill([self._tokens_per_step], constant_op.constant(False)) sync_op = sync_queue.enqueue_many(tokens) # non chief workers deque a token, they will block here until chief is done else: # Make sure train_ops are run with ops.control_dependencies(train_ops), ops.device( global_step.device): sync_op = sync_queue.dequeue() # All workers pull averaged values with ops.control_dependencies([sync_op]): local_update_op = self._assign_vars(local_vars, global_vars) return local_update_op # Check if we should push and average or not with ops.control_dependencies([apply_updates]): condition = math_ops.equal( math_ops.mod(global_step, self._interval_steps), 0) conditional_update = control_flow_ops.cond(condition, _apply_averages, control_flow_ops.no_op) chief_init_ops = [] # Initialize accumulators, ops placed in ps for accum, dev in self._accumulator_list: with ops.device(dev): chief_init_ops.append( accum.set_global_step(global_step, name="SetGlobalStep")) self._chief_init_op = control_flow_ops.group(*(chief_init_ops)) return conditional_update
def _apply_gradient(self, grad, var, indices=None): """The main function to update a variable. Args: grad: A Tensor containing gradient to apply. var: A Tensor containing the variable to update. indices: An array of integers, for sparse update. Returns: Updated variable var = var - learning_rate * preconditioner * grad If the gradient is dense, var and grad have the same shape. If the update is sparse, then the first dimension of the gradient and var may differ, others are all the same. In this case the indices array provides the set of indices of the variable which are to be updated with each row of the gradient. """ global_step = self._global_step + 1 # Update accumulated weighted average of gradients gbar = self.get_slot(var, "gbar") gbar_decay_t = GetParam(self._gbar_decay, global_step) gbar_weight_t = GetParam(self._gbar_weight, global_step) if indices is not None: # Note - the sparse update is not easily implemented, since the # algorithm needs all indices of gbar to be updated # if mat_gbar_decay != 1 or mat_gbar_decay != 0. # One way to make mat_gbar_decay = 1 is by rescaling. # If we want the update: # G_{t+1} = a_{t+1} G_t + b_{t+1} w_t # define: # r_{t+1} = a_{t+1} * r_t # h_t = G_t / r_t # Then: # h_{t+1} = h_t + (b_{t+1} / r_{t+1}) * w_t # So we get the mat_gbar_decay = 1 as desired. # We can implement this in a future version as needed. # However we still need gbar_decay = 0, otherwise all indices # of the variable will need to be updated. if self._gbar_decay != 0.0: tf_logging.warning("Not applying momentum for variable: %s" % var.name) gbar_updated = grad else: gbar_updated = self._weighted_average(gbar, self._gbar_decay, gbar_decay_t, gbar_weight_t * grad) # Update the preconditioners and compute the preconditioned gradient shape = var.get_shape() mat_g_list = [] for i in range(len(shape)): mat_g_list.append(self.get_slot(var, "Gbar_" + str(i))) mat_gbar_decay_t = GetParam(self._mat_gbar_decay, global_step) mat_gbar_weight_t = GetParam(self._mat_gbar_weight, global_step) preconditioned_grad = gbar_updated v_rank = len(mat_g_list) neg_alpha = - GetParam(self._alpha, global_step) / v_rank svd_interval = GetParam(self._svd_interval, global_step) precond_update_interval = GetParam(self._precond_update_interval, global_step) for i, mat_g in enumerate(mat_g_list): # axes is the list of indices to reduce - everything but the current i. axes = list(range(i)) + list(range(i+1, v_rank)) if shape[i] <= self._max_matrix_size: # If the tensor size is sufficiently small perform full Shampoo update # Note if precond_update_interval > 1 and mat_gbar_decay_t != 1, this # is not strictly correct. However we will use it for now, and # fix if needed. (G_1 = aG + bg ==> G_n = a^n G + (1+a+..+a^{n-1})bg) # pylint: disable=g-long-lambda,cell-var-from-loop mat_g_updated = control_flow_ops.cond( math_ops.mod(global_step, precond_update_interval) < 1, lambda: self._update_mat_g( mat_g, grad, axes, mat_gbar_decay_t, mat_gbar_weight_t * precond_update_interval, i), lambda: mat_g) if self._svd_interval == 1: mat_h = self._compute_power(var, mat_g_updated, shape[i], neg_alpha) else: mat_h = control_flow_ops.cond( math_ops.mod(global_step, svd_interval) < 1, lambda: self._compute_power(var, mat_g_updated, shape[i], neg_alpha, "H_" + str(i)), lambda: self.get_slot(var, "H_" + str(i))) # mat_h is a square matrix of size d_i x d_i # preconditioned_grad is a d_i x ... x d_n x d_0 x ... d_{i-1} tensor # After contraction with a d_i x d_i tensor # it becomes a d_{i+1} x ... x d_n x d_0 x ... d_i tensor # (the first dimension is contracted out, and the second dimension of # mat_h is appended). After going through all the indices, it becomes # a d_0 x ... x d_n tensor again. preconditioned_grad = math_ops.tensordot(preconditioned_grad, mat_h, axes=([0], [0]), name="precond_" + str(i)) else: # Tensor size is too large -- perform diagonal Shampoo update grad_outer = math_ops.reduce_sum(grad * grad, axis=axes) if i == 0 and indices is not None: assert self._mat_gbar_decay == 1.0 mat_g_updated = state_ops.scatter_add(mat_g, indices, mat_gbar_weight_t * grad_outer) mat_h = math_ops.pow( array_ops.gather(mat_g_updated, indices) + self._epsilon, neg_alpha) else: mat_g_updated = self._weighted_average(mat_g, self._mat_gbar_decay, mat_gbar_decay_t, mat_gbar_weight_t * grad_outer) mat_h = math_ops.pow(mat_g_updated + self._epsilon, neg_alpha) # Need to do the transpose to ensure that the tensor becomes # a d_{i+1} x ... x d_n x d_0 x ... d_i tensor as described above. preconditioned_grad = array_ops.transpose( preconditioned_grad, perm=list(range(1, v_rank)) + [0]) * mat_h # Update the variable based on the Shampoo update learning_rate_t = GetParam(self._learning_rate, global_step) if indices is not None: var_updated = state_ops.scatter_add( var, indices, -learning_rate_t * preconditioned_grad) else: var_updated = state_ops.assign_sub(var, learning_rate_t * preconditioned_grad) return var_updated
def batch_execute(global_step, thunks, batch_size, name=None): """Executes a subset of ops per global step. Given a list of thunks, each of which produces a single stateful op, ensures that exactly 'batch_size' ops are run per global step. Ops are scheduled in a round-robin fashion. For example, with 3 ops global_step | op0 | op1 | op2 ------------+-----+-----+----- 0 | x | x | ------------+-----+-----+----- 1 | x | | x ------------+-----+-----+----- 2 | | x | x ------------+-----+-----+----- 3 | x | x | ------------+-----+-----+----- 4 | x | | x Does not guarantee order of op execution within a single global step. Args: global_step: Tensor indicating time. Determines which ops run. thunks: List of thunks. Each thunk encapsulates one op. Return values are ignored. batch_size: int. Number of ops to execute per global_step. name: string or None. Name scope for newly added ops. Returns: List of ops. Exactly 'batch_size' ops are guaranteed to have an effect every global step. """ def true_fn(thunk): """Ensures thunk is executed and returns an Op (not a Tensor).""" def result(): with ops.control_dependencies([thunk()]): return control_flow_ops.no_op() return result def false_fn(_): """Executes a no-op.""" def result(): return control_flow_ops.no_op() return result with ops.name_scope(name, "batch_execute"): true_fns = [true_fn(thunk) for thunk in thunks] false_fns = [false_fn(thunk) for thunk in thunks] num_thunks = len(thunks) conditions = [ math_ops.less( math_ops.mod(batch_size - 1 + global_step * batch_size - j, num_thunks), batch_size) for j in range(num_thunks) ] result = [ control_flow_ops.cond(condition, true_fn, false_fn) for (condition, true_fn, false_fn) in zip(conditions, true_fns, false_fns) ] return result