def testIsCondSwitch(self): g = self.build_test_graph() cond_switch = [ "OuterCond/cond/Switch", "OuterCond/cond/OuterWhile/while/Switch", "OuterCond/cond/OuterWhile/while/NestedCond/cond/Switch", "OuterCond/cond/OuterWhile/while/NestedCond/cond/Add/Switch", "OuterCond/cond/OuterWhile/while/NestedCond/cond/Add_1/Switch", "OuterCond/cond/Add/Switch", ] for n in g.get_operations(): if control_flow_util.IsSwitch(n): self.assertTrue( control_flow_util.IsCondSwitch(n) != control_flow_util.IsLoopSwitch(n)) if n.name in cond_switch: self.assertTrue(control_flow_util.IsSwitch(n)) self.assertTrue(control_flow_util.IsCondSwitch(n), msg="Mismatch for {}".format(n.name)) self.assertFalse(control_flow_util.IsLoopSwitch(n), msg="Mismatch for {}".format(n.name)) else: self.assertFalse(control_flow_util.IsCondSwitch(n), msg="Mismatch for {}".format(n.name))
def _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state): """Update pending count for the inputs of op and enqueue ready ops.""" for x in op.inputs: pending_count[x.op] -= 1 ready = (pending_count[x.op] == 0) if loop_state and not ready: ready = pending_count[x.op] > 0 and control_flow_util.IsLoopSwitch(x.op) if ready: if control_flow_util.IsLoopExit(x.op): # if x is an exit without real gradient, defer processing them. grad_state = loop_state.GetGradState(x.op, before=False) grad_state.deferred_exits.append(x) grad_state.pending_exits_count -= 1 if grad_state.pending_exits_count == 0: # We now have all the exits so process them. has_not_none_grad = False for y in grad_state.deferred_exits: if _HasAnyNotNoneGrads(grads, y.op): has_not_none_grad = True queue.append(y.op) else: grad_state.unused_exits.append(y) if has_not_none_grad: # For an unused exit, if it has trainable outputs, backprop # a zero gradient. Otherwise, just ignore it. for y in grad_state.unused_exits: if _IsTrainable(y): _SetGrad(grads, y, loop_state.ZerosLikeForExit(y)) queue.append(y.op) else: # All exits are "unused" so use None as gradient. for y in grad_state.unused_exits: queue.append(y.op) else: queue.append(x.op)
def _SetGrad(grads, t, grad): """Sets gradient "grad" in "grads" for tensor "t".""" op = t.op op_grads = grads.get(op) if not op_grads: op_grads = [[] for _ in xrange(len(op.outputs))] grads[op] = op_grads t_grads = op_grads[t.value_index] if isinstance(t_grads, list): t_grads.append(grad) else: assert control_flow_util.IsLoopSwitch(op) op_grads[t.value_index] = grad
def while_loop_op(op): """Returns true if op is one of the special ops of in a while loop. Args: op: A tf.Operation. Returns: True if the given op is one of [Switch, Merge, Enter, Exit, NextIteration, LoopCond], which are all building blocks for TF while loops. """ return (control_flow_util.IsLoopSwitch(op) or control_flow_util.IsLoopMerge(op) or control_flow_util.IsLoopEnter(op) or control_flow_util.IsLoopExit(op) or loop_cond_op(op) or op.type in ('RefNextIteration', 'NextIteration'))
def _AggregatedGrads(grads, op, loop_state, aggregation_method=None): """Get the aggregated gradients for op. Args: grads: The map of memoized gradients. op: The op to get gradients for. loop_state: An object for maintaining the state of the while loops in the graph. It is of type ControlFlowState. None if the graph contains no while loops. aggregation_method: Specifies the method used to combine gradient terms. Accepted values are constants defined in the class `AggregationMethod`. Returns: A list of gradients, one per each output of `op`. If the gradients for a particular output is a list, this function aggregates it before returning. Raises: TypeError: if the incoming grads are not Tensors or IndexedSlices. ValueError: if the arguments are invalid. """ if aggregation_method is None: aggregation_method = AggregationMethod.DEFAULT if aggregation_method not in [ AggregationMethod.ADD_N, AggregationMethod.EXPERIMENTAL_TREE, AggregationMethod.EXPERIMENTAL_ACCUMULATE_N ]: raise ValueError("Invalid aggregation_method specified %s." % aggregation_method) out_grads = _GetGrads(grads, op) for i, out_grad in enumerate(out_grads): if loop_state: if isinstance(out_grad, (ops.Tensor, ops.IndexedSlices)): assert control_flow_util.IsLoopSwitch(op) continue # Grads have to be Tensors or IndexedSlices if (isinstance(out_grad, collections.Sequence) and not all([ isinstance(g, (ops.Tensor, ops.IndexedSlices)) for g in out_grad if g is not None ])): raise TypeError("gradients have to be either all Tensors " "or all IndexedSlices") # Aggregate multiple gradients, and convert [] to None. if out_grad: if len(out_grad) < 2: used = "nop" out_grads[i] = out_grad[0] elif all( [isinstance(g, ops.Tensor) for g in out_grad if g is not None]): tensor_shape = _AccumulatorShape(out_grad) if (aggregation_method == AggregationMethod.EXPERIMENTAL_ACCUMULATE_N and len(out_grad) > 2 and tensor_shape.is_fully_defined()): # The benefit of using AccumulateN is that its inputs can be combined # in any order and this can allow the expression to be evaluated with # a smaller memory footprint. When used with gpu_allocator_retry, # it is possible to compute a sum of terms which are much larger than # total GPU memory. # AccumulateN can currently only be used if we know the shape for # an accumulator variable. If this is not known, or if we only have # 2 grads then we fall through to the "tree" case below. used = "accumulate_n" out_grads[i] = math_ops.accumulate_n(out_grad) elif aggregation_method in [ AggregationMethod.EXPERIMENTAL_TREE, AggregationMethod.EXPERIMENTAL_ACCUMULATE_N ]: # Aggregate all gradients by doing pairwise sums: this may # reduce performance, but it can improve memory because the # gradients can be released earlier. # # TODO (vrv): Consider replacing this with a version of id:2522 gh:2523 # tf.AddN() that eagerly frees its inputs as soon as they are # ready, so the order of this tree does not become a problem. used = "tree" with ops.name_scope(op.name + "_gradient_sum"): running_sum = out_grad[0] for grad in out_grad[1:]: running_sum = math_ops.add_n([running_sum, grad]) out_grads[i] = running_sum else: used = "add_n" out_grads[i] = _MultiDeviceAddN(out_grad) logging.vlog(2, " _AggregatedGrads %d x %s using %s", len(out_grad), tensor_shape, used) else: out_grad = math_ops._as_indexed_slices_list( [g for g in out_grad if g is not None]) out_grad = [_HandleNestedIndexedSlices(x) for x in out_grad] # Form IndexedSlices out of the concatenated values and # indices. out_grads[i] = ops.IndexedSlices( array_ops.concat([x.values for x in out_grad], 0), array_ops.concat([x.indices for x in out_grad], 0), out_grad[0].dense_shape) else: # not out_grad # out_grads[i] is [], thus its aggregation is simply None. out_grads[i] = None return out_grads
def _AggregatedGrads(grads, op, gradient_uid, loop_state, aggregation_method=None): """Get the aggregated gradients for op. Args: grads: The map of memoized gradients. op: The op to get gradients for. gradient_uid: A unique identifier within the graph indicating which invocation of gradients is being executed. Used to cluster ops for compilation. loop_state: An object for maintaining the state of the while loops in the graph. It is of type ControlFlowState. None if the graph contains no while loops. aggregation_method: Specifies the method used to combine gradient terms. Accepted values are constants defined in the class `AggregationMethod`. Returns: A list of gradients, one per each output of `op`. If the gradients for a particular output is a list, this function aggregates it before returning. Raises: TypeError: if the incoming grads are not Tensors or IndexedSlices. ValueError: if the arguments are invalid. """ if aggregation_method is None: aggregation_method = AggregationMethod.DEFAULT if aggregation_method not in [ AggregationMethod.ADD_N, AggregationMethod.EXPERIMENTAL_TREE, AggregationMethod.EXPERIMENTAL_ACCUMULATE_N ]: raise ValueError("Invalid aggregation_method specified %s." % aggregation_method) out_grads = _GetGrads(grads, op) for i, out_grad in enumerate(out_grads): if loop_state: if isinstance(out_grad, (ops.Tensor, ops.IndexedSlices)): assert control_flow_util.IsLoopSwitch(op) continue # Grads have to be Tensors or IndexedSlices if (isinstance(out_grad, collections_abc.Sequence) and not all( isinstance(g, (ops.Tensor, ops.IndexedSlices)) for g in out_grad if g is not None)): raise TypeError("gradients have to be either all Tensors " "or all IndexedSlices") # Aggregate multiple gradients, and convert [] to None. if out_grad: if len(out_grad) < 2: used = "nop" out_grads[i] = out_grad[0] elif all( isinstance(g, ops.Tensor) for g in out_grad if g is not None): tensor_shape = _AccumulatorShape(out_grad) if aggregation_method in [ AggregationMethod.EXPERIMENTAL_TREE, AggregationMethod.EXPERIMENTAL_ACCUMULATE_N ]: # Aggregate all gradients by doing pairwise sums: this may # reduce performance, but it can improve memory because the # gradients can be released earlier. # # TODO(vrv): Consider replacing this with a version of # tf.AddN() that eagerly frees its inputs as soon as they are # ready, so the order of this tree does not become a problem. used = "tree" with ops.name_scope(op.name + "_gradient_sum"): running_sum = out_grad[0] for grad in out_grad[1:]: running_sum = math_ops.add_n([running_sum, grad]) out_grads[i] = running_sum else: used = "add_n" out_grads[i] = _MultiDeviceAddN(out_grad, gradient_uid) logging.vlog(2, " _AggregatedGrads %d x %s using %s", len(out_grad), tensor_shape, used) else: out_grads[i] = backprop.aggregate_indexed_slices_gradients( out_grad) # pylint: disable=protected-access else: # not out_grad # out_grads[i] is [], thus its aggregation is simply None. out_grads[i] = None return out_grads
def ZerosLike(self, op, index): """Create zeros_like for the specified output of an op. If op is in a while loop that is part of gradients(), this method must be called in its grad loop context. Args: op: A tensorflow operation. index: the index for a specific output of the op. Returns: A zero tensor of the same shape of op.outputs[index]. """ if util.IsLoopSwitch(op): return None if op.graph._building_function: # pylint: disable=protected-access # The optimization here is tricky to apply to functions return array_ops.zeros_like(op.outputs[index]) dead_branch = util.IsSwitch(op) forward_ctxt = util.GetWhileContext(op) grad_state = self._map.get(forward_ctxt) if grad_state is None: # op is not in a while loop that is part of gradients(). return ZerosLikeOutsideLoop(op, index) op_ctxt = op._get_control_flow_context() val = ops.convert_to_tensor(op.outputs[index], name="tensor") shape = val.get_shape() if shape.is_fully_defined(): # If the shape is known statically, just create a zero tensor with # the right shape in the grad loop context. result = constant_op.constant(0, shape=shape.dims, dtype=val.dtype) if dead_branch: # op is a cond switch. Guard the zero tensor with a switch. pred = grad_state.history_map.get(op_ctxt.pred.name) branch = op_ctxt.branch result = control_flow_ops._SwitchRefOrTensor(result, pred)[1 - branch] else: # Unknown shape so keep a history of the shape at runtime. if dead_branch: # Need to add a special switch to guard the value. pred = op_ctxt.pred branch = op_ctxt.branch op_ctxt.outer_context.Enter() val = control_flow_ops._SwitchRefOrTensor(op.inputs[0], pred)[1 - branch] zeros_shape = array_ops.shape_internal(val, optimize=False) op_ctxt.outer_context.Exit() val.op._set_control_flow_context(op_ctxt) zeros_shape.op._set_control_flow_context(op_ctxt) else: op_ctxt.Enter() zeros_shape = array_ops.shape_internal(val, optimize=False) op_ctxt.Exit() # Add forward accumulator for shape. grad_state.grad_context.Exit() history_zeros_shape = grad_state.AddForwardAccumulator( zeros_shape, dead_branch=dead_branch) grad_state.grad_context.Enter() # Create a zero tensor with the right shape. shape = grad_state.AddBackpropAccumulatedValue( history_zeros_shape, zeros_shape, dead_branch) result = array_ops.zeros(shape, val.dtype) return result