def _split_indexed_slices_v2(sp_input=None, num_split=None, dim_size=0, name=None): ids_per_partition = dim_size // num_split extras = dim_size % num_split with ops.name_scope(name): # When the partitioned dim cannot be divided by num_split, the reminders are # evenly assigned from the first partition to the last. p_assignments = math_ops.maximum( sp_input.indices // (ids_per_partition + 1), (sp_input.indices - extras) // ids_per_partition) split_grads = [] for i in range(0, num_split): with ops.name_scope(f"part_{i}"): ids_not_in_i = array_ops.where( math_ops.not_equal(p_assignments, i)) flat_ids_not_in_i = array_ops.reshape(ids_not_in_i, [-1]) if sp_input.indices.dtype == dtypes.int64: flat_ids_not_in_i = math_ops.cast( flat_ids_not_in_i, dtypes.int64) else: flat_ids_not_in_i = math_ops.cast( flat_ids_not_in_i, dtypes.int32) s = array_ops.sparse_mask(sp_input, flat_ids_not_in_i) if i < extras: s._indices = math_ops.floor_mod( s.indices, ids_per_partition + 1) else: s._indices = math_ops.floor_mod( s.indices - extras, ids_per_partition) split_grads.append(s) return split_grads
def _make_train_function(self): self._assert_compiled() if self.train_function is None: logging.info("=>Creating training function...") inputs = self._feed_inputs + self._feed_targets if self.uses_learning_phase: inputs += [F.learning_phase()] with ops.name_scope('training'): with ops.name_scope(self.optimizer.__class__.__name__): if not hasattr(self.optimizer, 'get_updates'): self.optimizer = Optimizer( optimizer=self.optimizer, global_step=training_util.get_global_step()) # extra updates (e.g. slim.batch_norm) update_ops = fops.get_collection(fops.GraphKeys.UPDATE_OPS) training_updates = self.optimizer.get_updates( params=list(self.trainable_weights), loss=self.loss) self.train_function = Function( inputs=inputs, outputs=[self.loss] + self.metric_tensors, updates=training_updates + update_ops, name='train_function', hooks=self.train_hooks, **self._function_kwargs) logging.info("=>Finish creating training function...")
def __init__(self, type_name, name, container, config, resource_handle_func, create_op_func, is_initialized_op_func, serialize_op_func, deserialize_op_func): with ops.name_scope(name, type_name) as name: self._resource_handle = resource_handle_func(container, shared_name=name, name=name) self._is_initialized_op = is_initialized_op_func(self._resource_handle) tensor = serialize_op_func(self._resource_handle) self._create_op = create_op_func(self._resource_handle, config) # slice_spec is useful for saving a slice from a variable. # It's not meaningful the tree variable. So we just pass an empty # value. slice_spec = '' specs = [saver.BaseSaverBuilder.SaveSpec(tensor, slice_spec, name)] super(TreeVariableSaveable, self).__init__(self._resource_handle, specs, name) ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, self) resources.register_resource(self._resource_handle, self._create_op, self._is_initialized_op) self._deserialize_op_func = deserialize_op_func
def _compile_loss(self, loss, loss_weights, targets): logging.info("=>Compiling loss...") self.metric_names = ['loss'] # map with total_loss self.metric_tensors = [] with ops.name_scope('compile_loss'): if targets is not None: # else loss has already been a tensor total_loss = 0. self._compile_loss_function(loss) self._compile_loss_weights(loss_weights) self._compile_targets(targets) for i in range(len(self.outputs)): if i in self._skip_target_indices: continue loss_function = self._feed_loss_fns[i] target = self.targets[i] output = self.outputs[i] loss_weight = self.loss_weights[i] output_loss = loss_function(target, output) total_loss += loss_weight * output_loss if len(self.outputs) > 1: self.metric_tensors.append(output_loss) self.metric_names.append(self.output_names[i] + '_loss') loss = total_loss reg_loss = fops.get_collection(fops.GraphKeys.REGULARIZATION_LOSSES) if reg_loss: loss = math_ops.add_n(reg_loss + [loss]) self.loss = loss
def batch_normalization(batch_inputs, mean, variance, offset, scale, epsilon, data_format, name=None): """ param batch_inputs: shape of [num_components, batch, 1] or [channels, batch, h, w] :return shape of [batch, num_components, 1] or [batch, h, w, channel] or [batch, channel, h, w] """ with ops.name_scope(name, 'batchnorm', [batch_inputs, mean, variance, scale, offset]): shape = [1] * (len(batch_inputs.shape) - 1) + [-1] if data_format[-1] == 'C' \ else [1, -1] + [1] * (len(batch_inputs.shape) - 2) mean = array_ops.reshape(mean, shape) variance = array_ops.reshape(variance, shape) offset = array_ops.reshape(offset, shape) scale = array_ops.reshape(scale, shape) inv = math_ops.rsqrt(variance + epsilon) if scale is not None: inv *= scale a = math_ops.cast(inv, batch_inputs.dtype) b = math_ops.cast( offset - mean * inv if offset is not None else -mean * inv, batch_inputs.dtype) if data_format[-1] != 'C': a = _to_channel_first_bias(a) b = _to_channel_first_bias(b) outputs = math_ops.add(math_ops.multiply(batch_inputs, a), b) return outputs
def _collect_dense_gradients(self, graph_item, var_op_name): """Append collective ops after the gradient is calculated.""" if self.num_replicas * self.num_workers <= 1: raise ValueError( 'CollectiveOps requires collective group size > 1') compressors = defaultdict( lambda: Compressor.create(self._compressor_type, var_op_name)) conf = CollectiveOpsConfig() conf.group_size = len(self.all_canonical_replica_devices) conf.group_key = get_collective_keys().get_group_key( self.all_canonical_replica_devices) conf.instance_key = get_collective_keys().get_instance_key(var_op_name) conf.merge_op = 'Add' conf.final_op = 'Div' if self._spec: setattr(conf, 'communication_hint', self._spec) for i in range(0, self.num_replicas): op_name = ops.prepend_name_scope(var_op_name, replica_prefix(i)) graph_item.updated = True grad, _, _ = graph_item.var_op_name_to_grad_info_v2[op_name] # TODO (Tairui): (3) Merge of reduction for performance grad_consumers = get_consumers( grad.op) # this line must happen before the reduction # "\/" is added for name scope reuse with ops.name_scope( replica_prefix(i) + "/collective-group-{}/".format(self._group)): with ops.colocate_with(grad.op): reduced_grad = compressors[i].reduce(grad, conf) update_consumers(grad_consumers, grad, reduced_grad)
def grid_sample(inputs, grid, padding_mode='CONSTANT', name='grid_sample'): def _get_pixel(image, _y, _x): b, _h, _w = image.get_shape().as_list()[0:-1] batch_idx = array_ops.reshape(math_ops.range(b), shape=(b, 1, 1)) batch_idx = array_ops.tile(batch_idx, multiples=(1, _h - 1, _w - 1)) indices = array_ops.stack([batch_idx, _y, _x], axis=3) return array_ops.gather_nd(image, indices) with ops.name_scope(name): x_s = grid[:, 0, :, :] y_s = grid[:, 1, :, :] h, w = inputs.get_shape().as_list()[1:-1] images = array_ops.pad(inputs, array_ops.constant( ((0, 0), (0, 1), (0, 1), (0, 0))), mode=padding_mode) h = int32(h) w = int32(w) zero = array_ops.zeros([], dtypes.int32) x = (math_ops.multiply(x_s + 1., float32(w))) * 0.5 y = (math_ops.multiply(y_s + 1., float32(h))) * 0.5 x0 = clip_ops.clip_by_value(int32(math_ops.floor(x)), zero, w) x1 = clip_ops.clip_by_value(x0 + 1, zero, w) y0 = clip_ops.clip_by_value(int32(math_ops.floor(y)), zero, h) y1 = clip_ops.clip_by_value(y0 + 1, zero, h) ptl = _get_pixel(images, y0, x0) pbl = _get_pixel(images, y1, x0) ptr = _get_pixel(images, y0, x1) pbr = _get_pixel(images, y1, x1) x0 = float32(x0) x1 = float32(x1) y0 = float32(y0) y1 = float32(y1) wtl = array_ops.expand_dims(math_ops.multiply(math_ops.subtract(x1, x), math_ops.subtract(y1, y)), axis=3) wbl = array_ops.expand_dims(math_ops.multiply(math_ops.subtract(x1, x), math_ops.subtract(y, y0)), axis=3) wtr = array_ops.expand_dims(math_ops.multiply(math_ops.subtract(x, x0), math_ops.subtract(y1, y)), axis=3) wbr = array_ops.expand_dims(math_ops.multiply(math_ops.subtract(x, x0), math_ops.subtract(y, y0)), axis=3) outputs = math_ops.add_n([ math_ops.multiply(wtl, ptl), math_ops.multiply(wbl, pbl), math_ops.multiply(wtr, ptr), math_ops.multiply(wbr, pbr) ]) return outputs
def _name_scope(self): """ Note: when name is '', no name scope will be used layers inside this layer will be flatten in graph """ if self.name is '': yield else: with ops.name_scope(self.name) as scope: yield scope
def zero_state(self, batch_size, dtype): """Return an initial (zero) state tuple for this `AttentionWrapper`. **NOTE** Please see the initializer documentation for details of how to call `zero_state` if using an `AttentionWrapper` with a `BeamSearchDecoder`. Args: batch_size: `0D` integer tensor: the batch size. dtype: The internal state data type. Returns: An `AttentionWrapperState` tuple containing zeroed out tensors and, possibly, empty `TensorArray` objects. Raises: ValueError: (or, possibly at runtime, InvalidArgument), if `batch_size` does not match the output size of the encoder passed to the wrapper object at initialization time. """ with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): if self._initial_cell_state is not None: cell_state = self._initial_cell_state else: cell_state = self._cell.zero_state(batch_size, dtype) error_message = ( "When calling zero_state of AttentionWrapper %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and the requested batch size. Are you using " "the BeamSearchDecoder? If so, make sure your encoder output has " "been tiled to beam_width via tf.contrib.seq2seq.tile_batch, and " "the batch_size= argument passed to zero_state is " "batch_size * beam_width.") with tf.control_dependencies( self._batch_size_checks(batch_size, error_message)): cell_state = nest.map_structure( lambda s: tf.identity(s, name="checked_cell_state"), cell_state) return tf.contrib.seq2seq.AttentionWrapperState( cell_state=cell_state, time=tf.zeros([], dtype=tf.int32), attention=_zero_state_tensors(self._attention_layer_size, batch_size, dtype), alignments=self._item_or_tuple( attention_mechanism.initial_alignments(batch_size, dtype) for attention_mechanism in self._attention_mechanisms), # since we need to read the alignment history several times, so we need set clear_after_read to False alignment_history=self._item_or_tuple( tf.TensorArray(dtype=dtype, size=0, clear_after_read=False, dynamic_size=True) if self. _alignment_history else () for _ in self._attention_mechanisms), attention_state=self._item_or_tuple( attention_mechanism.initial_state(batch_size, dtype) for attention_mechanism in self._attention_mechanisms))
def get_initial_state(self, inputs): with ops.name_scope('initial_state'): initial_state = array_ops.zeros_like(inputs) # (b, t, i) initial_state = math_ops.reduce_sum(initial_state, axis=(1, 2)) # (b,) initial_state = array_ops.expand_dims(initial_state, axis=1) # (b, 1) return [ array_ops.tile(initial_state, [1, dim]) for dim in to_list(self.cell.state_size) ]
def __call__(self, *args, **kwargs): with ops.name_scope(self.name): updates = to_list(self.update_state(*args, **kwargs)) with fops.control_dependencies(updates): result = self.result() # We are adding the metric object as metadata on every result tensor. # This metric instance will later be used to reset variable state after # each epoch of training. for res in to_list(nest.flatten(result)): setattr(res, '_metric_obj', self) return result
def _make_eval_function(self): self._assert_compiled() if self.eval_function is None: logging.info("=>Creating evaluation function...") inputs = self._feed_inputs + self._feed_targets if self.uses_learning_phase: inputs += [F.learning_phase()] with ops.name_scope('evaluation'): self.eval_function = Function( inputs=inputs, outputs=[self.loss] + self.metric_tensors, name='eval_function', hooks=self.val_hooks, **self._function_kwargs) logging.info("=>Finish creating evaluation function...")
def _make_predict_function(self): self._assert_compiled() if self.predict_function is None: logging.info("=>Creating predict function...") inputs = self._feed_inputs if self.uses_learning_phase: inputs += [F.learning_phase()] with ops.name_scope('predict'): self.predict_function = Function( inputs=inputs, outputs=self.outputs, hooks=self._predict_hooks, name='predict_function', **self._function_kwargs) logging.info("=>Finish creating predict function...")
def _collect_sparse_gradients(self, graph_item, var_op_name): """Append collective ops after the gradient is calculated.""" if self.num_workers > 1 and not ENV.AUTODIST_INTERNAL_TF.value: raise NotImplementedError( 'Currently the collective NCCL AllGather is not supported in TensorFlow release.' 'Please choose another strategy.') conf = {} if self._spec: conf = {'communication_hint': self._spec} if self._compressor_type: logging.warning( 'AllGather currently does not support AutoDist compressor so it skips.' ) if self.num_replicas * self.num_workers <= 1: raise ValueError( 'CollectiveOps requires collective group size > 1') for i in range(0, self.num_replicas): op_name = ops.prepend_name_scope(var_op_name, replica_prefix(i)) graph_item.updated = True grad, _, _ = graph_item.var_op_name_to_grad_info_v2[op_name] # TODO (Tairui): (3) Merge of reduction for performance indices_c_ops = grad.indices.consumers() indices_cc_ops = get_control_consumers(grad.indices.op) values_c_ops = grad.values.consumers() values_cc_ops = get_control_consumers(grad.values.op) with ops.name_scope(replica_prefix(i)): with ops.colocate_with(grad.indices.op): new_indices = collective_ops.all_gather( grad.indices, self.num_replicas * self.num_workers, get_collective_keys().get_group_key( self.all_canonical_replica_devices), get_collective_keys().get_instance_key(var_op_name + '-indices'), **conf) with ops.colocate_with(grad.values.op): new_values = collective_ops.all_gather( grad.values, self.num_replicas * self.num_workers, get_collective_keys().get_group_key( self.all_canonical_replica_devices), get_collective_keys().get_instance_key(var_op_name + '-values'), **conf) update_consumers(indices_c_ops, grad.indices, new_indices) update_control_consumers(indices_cc_ops, grad.indices.op, new_indices.op) update_consumers(values_c_ops, grad.values, new_values) update_control_consumers(values_cc_ops, grad.values.op, new_values)
def affine_grid(theta, size: (list, tuple), name='affine_grid'): with ops.name_scope(name): x = gen_math_ops.lin_space(-1., 1., size[1]) y = gen_math_ops.lin_space(-1., 1., size[2]) x_t, y_t = array_ops.meshgrid(x, y) x_t = array_ops.reshape(x_t, shape=(-1, )) y_t = array_ops.reshape(y_t, shape=(-1, )) ones = array_ops.ones_like(x_t) grids = array_ops.stack([x_t, y_t, ones]) grids = array_ops.expand_dims(grids, axis=0) grids = array_ops.tile(grids, multiples=array_ops.stack([size[0], 1, 1])) grids = float32(grids) theta = float32(theta) grids = math_ops.matmul(theta, grids) grids = array_ops.reshape(grids, shape=(size[0], 2, size[1], size[2])) return grids
def graph_scope(name, default_name=None, values=None): from tensorlib.engine import Input if values is None: raise ValueError("Argument `values` can not be None.") values = to_list(values) [F.assert_tensor_traceable(x) for x in values] with ops.name_scope(name=name, default_name=default_name, values=values) as scope: inputs = unpack_singleton([ Input(batch_input_shape=F.int_shape(x), dtype=x.dtype) for x in values ]) handler = GraphScope(scope=scope, inputs=inputs) yield handler net = Network(inputs=inputs, outputs=handler.outputs, name=scope) graph_ops.build_node(net, values, to_list(handler.outputs)) # print(getattr(handler.outputs, '_anchor')[0]) del handler
def __init__(self, type_name, name, container, config, resource_handle_func, create_op_func, is_initialized_op_func, serialize_op_func, deserialize_op_func): with ops.name_scope(name, type_name) as name: self._resource_handle = resource_handle_func( container, shared_name=name, name=name) self._is_initialized_op = is_initialized_op_func(self._resource_handle) tensor = serialize_op_func(self._resource_handle) self._create_op = create_op_func(self._resource_handle, config) # slice_spec is useful for saving a slice from a variable. # It's not meaningful the tree variable. So we just pass an empty # value. slice_spec = '' specs = [saver.BaseSaverBuilder.SaveSpec(tensor, slice_spec, name)] super(TreeVariableSaveable, self).__init__(self._resource_handle, specs, name) ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, self) resources.register_resource(self._resource_handle, self._create_op, self._is_initialized_op) self._deserialize_op_func = deserialize_op_func
def add_summary_ops(self, name, value): with ops.name_scope(self.name): summary_op = summary.scalar(name=name, tensor=value) fops.add_to_collection(fops.GraphKeys.SUMMARIES, summary_op)
def rnn(step_fn, inputs, initial_states, go_backwards=False, unroll=False, input_length=None, name='rnn_block'): with ops.name_scope(name): dim = ndim(inputs) if dim < 3: raise ValueError("Input should be at least 3D") perm = [1, 0] + list(range(2, dim)) inputs = array_ops.transpose(inputs, perm=perm, name='to_time_major') if unroll: assert int_shape(inputs)[0] is not None,\ "Unrolling requires a fixed number of time steps" states = initial_states successive_states = [] successive_outputs = [] input_list = array_ops.unstack(inputs) if go_backwards: input_list.reverse() for x in input_list: outputs, states = step_fn(x, states) successive_outputs.append(outputs) successive_states.append(states) last_output = successive_outputs[-1] new_states = successive_states[-1] outputs = array_ops.stack(successive_outputs) else: if go_backwards: inputs = array_ops.reverse(inputs, axis=0) states = tuple(initial_states) time_steps = array_ops.shape(inputs)[0] outputs, _ = step_fn(inputs[0], initial_states) output_ta = tensor_array_ops.TensorArray( dtype=outputs.dtype, size=time_steps, tensor_array_name='output_ta') input_ta = tensor_array_ops.TensorArray( dtype=inputs.dtype, size=time_steps, tensor_array_name='input_ta') # unstack inputs and write into input array input_ta = input_ta.unstack(inputs) time = array_ops.constant(0, dtype='int32', name='time') def _step(_time, _output_ta, *_states): current_input = input_ta.read(_time) output, _new_states = step_fn(current_input, tuple(_states)) for state, new_state in zip(_states, _new_states): new_state.set_shape(state.get_shape()) _output_ta = _output_ta.write(_time, output) return (_time + 1, _output_ta) + tuple(_new_states) final_outputs = control_flow_ops.while_loop( cond=lambda _time, *_: _time < time_steps, body=_step, loop_vars=(time, output_ta) + states, parallel_iterations=32, swap_memory=True, maximum_iterations=input_length) last_time = final_outputs[0] output_ta = final_outputs[1] new_states = final_outputs[2:] outputs = output_ta.stack() last_output = output_ta.read(last_time - 1) perm = [1, 0] + list(range(2, ndim(outputs))) outputs = array_ops.transpose(outputs, perm=perm) return last_output, outputs, new_states
def _compile_metrics(self, metrics): """ Compile metrics to desired format each output map with a list of metrics item inside metrics can be an instance of `training.Metric` or a tensor Note: when metrics if class-format, we will do formation check between metrics and `self.outputs` to make sure enough number of metrics to compatible with `self.outputs` and `self.targets` when metrics if tensor-format, we will not do formation check, cause metric calculation already handled by users themselves inside `model_fn` :param metrics: None or a nested list or dict """ logging.info("=>Compiling metrics...") is_tensor = False if not metrics: metrics = [[]] * len(self.outputs) elif isinstance(metrics, list): if not F.is_tensor(metrics[0]): if not is_tensor and len(metrics) != len(self.outputs): raise ValueError("Number of metric inside `metrics`" " %d is not compatible with number" " of `self.outputs` %d" % ( len(metrics), len(self.outputs))) else: is_tensor = True metrics = [('metric_%d' % (i+1), m) for i, m in enumerate(metrics)] elif isinstance(metrics, dict): if not F.is_tensor(metrics[list(metrics.keys())[0]]): metrics = [metrics.get(name, []) for name in self.output_names] else: is_tensor = True metrics = list(metrics.items()) else: raise TypeError("Unexpected type of metrics: " + str(type(metrics))) with ops.name_scope('compile_metric'): if is_tensor: self._compile_metric_tensors(metrics) else: # Must handle sparse situation carefully! def _compile_metric(m, loss_fn): if isinstance(loss_fn, losses.SparseCategoricalCrossEntropy): if m in {'accuracy', 'acc'}: m = metric_module.SparseCategoricalAccuracy() return m m = metric_module.get(m) return m metric_tensors = [] for i in range(len(self.outputs)): if i in self._skip_target_indices: continue target = self.targets[i] output = self.outputs[i] output_metrics = to_list(metrics[i]) loss_function = self.loss_functions[i] for j, metric in enumerate(output_metrics): metric = _compile_metric(metric, loss_function) metric_name = getattr(metric, 'name', 'metric_%d' % j) metric_result = metric(target, output) if len(self.output_names) > 1: metric_name = self.output_names[i] + '_' + metric_name metric_tensors.append((metric_name, metric_result)) self._compile_metric_tensors(metric_tensors)
def add_weight(self, name, shape=None, dtype=None, initial_value=None, initializer=None, regularizer=None, trainable=None, constraint=None, **kwargs): """ Add a variable weight to layer :param name: Name of weights :param shape: Shape of weights :param dtype: Data type of weights :param initial_value: Initial value of weights :param initializer: Initializer for weights :param regularizer: Regularizer for weights :param trainable: A boolean, whether the weight should be trained via backprop or not (assuming that the layer itself is also trainable). :param constraint: Optional constraint instance :return weight itself """ dtype = dtype or self.dtype if initial_value is None: if shape is None: raise ValueError("When initial_value is not specified," " shape for initializing must be specified.") if initializer is None: raise ValueError( "When initial_value is not specified," " initializer for initializing must be specified.") initial_value = initializers.get(initializer)(shape, dtype=dtype) synchronization = kwargs.get('synchronization', variables.VariableSynchronization.AUTO) if synchronization == variables.VariableSynchronization.ON_READ: if trainable: raise ValueError("Synchronization value can be set to" " VariableSynchronization.ON_READ only" " for non-trainable variables") else: trainable = False elif trainable is None: trainable = True weight = variables.Variable(initial_value=initial_value, trainable=trainable, dtype=dtype, constraint=constraint, name=name, **kwargs) if regularizer is not None: with ops.name_scope('weight_regularizer'): reg_loss = regularizers.get(regularizer)(weight) ops.add_to_collection(fops.GraphKeys.REGULARIZATION_LOSSES, reg_loss) if trainable: self._trainable_weights.append(weight) else: self._non_trainable_weights.append(weight) return weight
def __call__(self, y_true, y_pred, sample_weight=None): with ops.name_scope(self.name): losses = self.forward(y_true, y_pred) losses = math_ops.reduce_mean(losses) self.add_summary_ops(self.name + '_loss', losses) return losses
def random_central_crop(image, minval, maxval): with ops.name_scope(None, 'central_crop', [image]): image = ops.convert_to_tensor(image, name='image') if (minval < 0 or maxval < 0 or minval > 1 or maxval > 1): raise ValueError('crop ratio range must be between 0 and 1.') _AssertAtLeast3DImage(image) rank = image.get_shape().ndims if rank != 3 and rank != 4: raise ValueError( '`image` should either be a Tensor with rank = 3 or ' 'rank = 4. Had rank = {}.'.format(rank)) # Helper method to return the `idx`-th dimension of `tensor`, along with # a boolean signifying if the dimension is dynamic. def _get_dim(tensor, idx): static_shape = tensor.get_shape()[idx].value if static_shape is not None: return static_shape, False return array_ops.shape(tensor)[idx], True # Get the height, width, depth (and batch size, if the image is a 4-D # tensor). if rank == 3: img_h, dynamic_h = _get_dim(image, 0) img_w, dynamic_w = _get_dim(image, 1) img_d = image.get_shape()[2] else: img_bs = image.get_shape()[0] img_h, dynamic_h = _get_dim(image, 1) img_w, dynamic_w = _get_dim(image, 2) img_d = image.get_shape()[3] central_fraction = tf.random_uniform([], minval=minval, maxval=maxval, dtype=tf.float64) # Compute the bounding boxes for the crop. The type and value of the # bounding boxes depend on the `image` tensor's rank and whether / not the # dimensions are statically defined. img_hd = math_ops.to_double(img_h) bbox_h_start = math_ops.to_int32( (img_hd - img_hd * central_fraction) / 2) img_wd = math_ops.to_double(img_w) bbox_w_start = math_ops.to_int32( (img_wd - img_wd * central_fraction) / 2) bbox_h_size = img_h - bbox_h_start * 2 bbox_w_size = img_w - bbox_w_start * 2 if rank == 3: bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0]) bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1]) else: bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0]) bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1]) image = array_ops.slice(image, bbox_begin, bbox_size) # Reshape the `image` tensor to the desired size. if rank == 3: image.set_shape([None, None, img_d]) else: image.set_shape([img_bs, None, None, img_d]) return image
def _get_accumulation_ops(graph_item, gradient, target, num_accum_required): def _get_accum_apply_and_agg_grad(var_op, grad, indices, dense_shape): if indices is None: tensor = variable_utils.get_read_var_tensor(var_op) grad_accum = data_flow_ops.ConditionalAccumulator( grad.dtype, shape=tensor.get_shape(), shared_name=var_op.name + "/grad_accum") # Get a copy of consumers list before creating accum_apply_op grad_consumers = list(grad.consumers()) accum_apply_op = grad_accum.apply_grad(grad, local_step=MAX_INT64, name=grad.op.name + '_accum_apply_grad') agg_grad = grad_accum.take_grad(num_accum_required, name=var_op.name + '_take_grad') update_consumers(grad_consumers, grad, agg_grad) update_control_consumers(get_control_consumers(grad.op), grad.op, agg_grad.op) else: grad_indexed_slices = ops.IndexedSlices( values=grad, indices=indices, dense_shape=dense_shape) grad_accum = data_flow_ops.SparseConditionalAccumulator( grad.dtype, shape=grad.shape, shared_name=var_op.name + "/grad_accum") # Get a copy of consumers list before creating accum_apply_op indices_consumers = list(indices.consumers()) grad_consumers = list(grad.consumers()) accum_apply_op = grad_accum.apply_indexed_slices_grad( grad_indexed_slices, local_step=MAX_INT64, name=grad.op.name + '_accum_apply_grad') agg_grad = grad_accum.take_indexed_slices_grad( num_accum_required, name=var_op.name + '_take_grad') agg_indices = agg_grad.indices if indices.dtype != agg_grad.indices.dtype: agg_indices = math_ops.cast(agg_grad.indices, indices.dtype) agg_grad = ops.IndexedSlices(values=agg_grad.values, indices=agg_indices, dense_shape=agg_grad.dense_shape) assert isinstance(agg_grad, ops.IndexedSlices) update_consumers(indices_consumers, indices, agg_grad.indices) update_consumers(grad_consumers, grad, agg_grad.values) update_control_consumers(get_control_consumers(indices.op), indices.op, agg_grad.indices.op) update_control_consumers(get_control_consumers(grad.op), grad.op, agg_grad.values.op) return accum_apply_op, agg_grad # Aggregate gradients from different workers using ConditionalAccumulator. # var_op_to_agg_grad and var_op_to_accum_apply_op are updated. var_op_to_agg_grad = {} var_op_to_accum_apply_op = {} if target.op not in graph_item.trainable_var_op_to_var: logging.debug( "Gradient for non-trainable variable %s is created, " "do not insert accumulator for aggregating this gradient" % target.op.name) return {}, {} var_op = target.op if isinstance(gradient, ops.Tensor): grad = gradient indices = None dense_shape = None else: grad = gradient.values indices = gradient.indices dense_shape = gradient.dense_shape with ops.device(var_op.device), ops.name_scope(""): accum_apply_op, agg_grad = _get_accum_apply_and_agg_grad( var_op, grad, indices, dense_shape) if indices is None: var_op_to_agg_grad[var_op] = (None, agg_grad) else: var_op_to_agg_grad[var_op] = (agg_grad.indices, agg_grad.values) var_op_to_accum_apply_op[var_op] = accum_apply_op return var_op_to_agg_grad, var_op_to_accum_apply_op