def _is_in_xla_context(): """Returns whether the current context is inside an XLA context.""" outer_graph = ops.get_default_graph() # The `_control_flow_context` is not copied when building a FuncGraph so # we look it up from the base graph. while isinstance(outer_graph, func_graph_module.FuncGraph): outer_graph = outer_graph.outer_graph cur_ctxt = outer_graph._get_control_flow_context() # pylint: disable=protected-access return control_flow_util.GetContainingXLAContext(cur_ctxt) is not None
def is_xla_compiled(): """Whether we are building graph that will be compiled by XLA. This checks whether the code is executing within an XLA context. If True, model authors should ensure the graph they build is compilable by XLA. Specifically, they should ensure that all ops have XLA implementations and that all shapes are statically known. Returns: bool, whether the current graph will be compiled for XLA. """ ctxt = tf.get_default_graph()._get_control_flow_context() # pylint: disable=protected-access return control_flow_util.GetContainingXLAContext(ctxt) is not None
def _is_in_xla_context(): """Returns whether the current context is inside an XLA context.""" outer_graph = ops.get_default_graph() # The `_control_flow_context` is not copied when building a FuncGraph so # we look it up from the base graph. while isinstance(outer_graph, func_graph_module.FuncGraph): outer_graph = outer_graph.outer_graph cur_ctxt = outer_graph._get_control_flow_context() # pylint: disable=protected-access return control_flow_util.GetContainingXLAContext(cur_ctxt) is not None Lowering allows while_v2 to avoid some of the limitations of Functions, allowing users to specify devices & colocation inside of while_v2 branches, and enabling non-strict evaluation & partial pruning of while_v2 branches. This brings while_v2 closer to feature parity with tf.while_loop. However, we do not lower `While` in the XLA context because it is easier for XLA to apply its own optimizations when dealing with un-lowered
def dynamic_decode( decoder, output_time_major=False, impute_finished=False, maximum_iterations=None, parallel_iterations=32, swap_memory=False, scope=None, ): """Perform dynamic decoding with `decoder`. Calls initialize() once and step() repeatedly on the Decoder object. Args: decoder: A `Decoder` instance. output_time_major: Python boolean. Default: `False` (batch major). If `True`, outputs are returned as time major tensors (this mode is faster). Otherwise, outputs are returned as batch major tensors (this adds extra time to the computation). impute_finished: Python boolean. If `True`, then states for batch entries which are marked as finished get copied through and the corresponding outputs get zeroed out. This causes some slowdown at each time step, but ensures that the final state and outputs have the correct values and that backprop ignores time steps that were marked as finished. maximum_iterations: `int32` scalar, maximum allowed number of decoding steps. Default is `None` (decode until the decoder is fully done). parallel_iterations: Argument passed to `tf.while_loop`. swap_memory: Argument passed to `tf.while_loop`. scope: Optional variable scope to use. Returns: `(final_outputs, final_state, final_sequence_lengths)`. Raises: TypeError: if `decoder` is not an instance of `Decoder`. ValueError: if `maximum_iterations` is provided but is not a scalar. """ if not isinstance(decoder, Decoder): raise TypeError( "Expected decoder to be type Decoder, but saw: %s" % type(decoder) ) with variable_scope.variable_scope(scope, "decoder") as varscope: # Determine context types. ctxt = ( ops.get_default_graph()._get_control_flow_context() ) # pylint: disable=protected-access is_xla = control_flow_util.GetContainingXLAContext(ctxt) is not None is_xla = True # XLA detection does not work in_while_loop = control_flow_util.GetContainingWhileContext(ctxt) is not None # Properly cache variable values inside the while_loop. # Don't set a caching device when running in a loop, since it is possible # that train steps could be wrapped in a tf.while_loop. In that scenario # caching prevents forward computations in loop iterations from re-reading # the updated weights. if not context.executing_eagerly() and not in_while_loop: if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) if maximum_iterations is not None: maximum_iterations = ops.convert_to_tensor( maximum_iterations, dtype=dtypes.int32, name="maximum_iterations" ) if maximum_iterations.get_shape().ndims != 0: raise ValueError("maximum_iterations must be a scalar") initial_finished, initial_inputs, initial_state = decoder.initialize() zero_outputs = _create_zero_outputs( decoder.output_size, decoder.output_dtype, decoder.batch_size ) if is_xla and maximum_iterations is None: raise ValueError("maximum_iterations is required for XLA compilation.") if maximum_iterations is not None: initial_finished = math_ops.logical_or( initial_finished, 0 >= maximum_iterations ) initial_sequence_lengths = array_ops.zeros_like( initial_finished, dtype=dtypes.int32 ) initial_time = constant_op.constant(0, dtype=dtypes.int32) def _shape(batch_size, from_shape): if (not isinstance(from_shape, tensor_shape.TensorShape) or from_shape.ndims == 0): return tensor_shape.TensorShape(None) else: batch_size = tensor_util.constant_value( ops.convert_to_tensor(batch_size, name="batch_size") ) return tensor_shape.TensorShape([batch_size]).concatenate(from_shape) dynamic_size = maximum_iterations is None or not is_xla def _create_ta(s, d): return tensor_array_ops.TensorArray( dtype=d, size=0 if dynamic_size else maximum_iterations, dynamic_size=dynamic_size, element_shape=_shape(decoder.batch_size, s), ) initial_outputs_ta = nest.map_structure( _create_ta, decoder.output_size, decoder.output_dtype ) def condition( unused_time, unused_outputs_ta, unused_state, unused_inputs, finished, unused_sequence_lengths, ): # return math_ops.logical_not(math_ops.reduce_all(finished)) #Remove this cond return True def body(time, outputs_ta, state, inputs, finished, sequence_lengths): """Internal while_loop body. Args: time: scalar int32 tensor. outputs_ta: structure of TensorArray. state: (structure of) state tensors and TensorArrays. inputs: (structure of) input tensors. finished: bool tensor (keeping track of what's finished). sequence_lengths: int32 tensor (keeping track of time of finish). Returns: `(time + 1, outputs_ta, next_state, next_inputs, next_finished, next_sequence_lengths)`. ``` """ (next_outputs, decoder_state, next_inputs, decoder_finished) = decoder.step( time, inputs, state ) if decoder.tracks_own_finished: next_finished = decoder_finished else: next_finished = math_ops.logical_or(decoder_finished, finished) next_sequence_lengths = array_ops.where( math_ops.logical_not(finished), array_ops.fill(array_ops.shape(sequence_lengths), time + 1), sequence_lengths, ) nest.assert_same_structure(state, decoder_state) nest.assert_same_structure(outputs_ta, next_outputs) nest.assert_same_structure(inputs, next_inputs) # Zero out output values past finish if impute_finished: emit = nest.map_structure( lambda out, zero: array_ops.where(finished, zero, out), next_outputs, zero_outputs, ) else: emit = next_outputs # Copy through states past finish def _maybe_copy_state(new, cur): # TensorArrays and scalar states get passed through. if isinstance(cur, tensor_array_ops.TensorArray): pass_through = True else: new.set_shape(cur.shape) pass_through = new.shape.ndims == 0 return new if pass_through else array_ops.where(finished, cur, new) if impute_finished: next_state = nest.map_structure(_maybe_copy_state, decoder_state, state) else: next_state = decoder_state outputs_ta = nest.map_structure( lambda ta, out: ta.write(time, out), outputs_ta, emit ) return ( time + 1, outputs_ta, next_state, next_inputs, next_finished, next_sequence_lengths, ) res = control_flow_ops.while_loop( condition, body, loop_vars=( initial_time, initial_outputs_ta, initial_state, initial_inputs, initial_finished, initial_sequence_lengths, ), parallel_iterations=parallel_iterations, maximum_iterations=maximum_iterations, swap_memory=swap_memory, ) final_outputs_ta = res[1] final_state = res[2] final_sequence_lengths = res[5] final_outputs = nest.map_structure(lambda ta: ta.stack(), final_outputs_ta) try: final_outputs, final_state = decoder.finalize( final_outputs, final_state, final_sequence_lengths ) except NotImplementedError: pass if not output_time_major: final_outputs = nest.map_structure(_transpose_batch_time, final_outputs) return final_outputs, final_state, final_sequence_lengths
def dynamic_decode(decoder, output_time_major: bool = False, impute_finished: bool = False, maximum_iterations=None, parallel_iterations: int = 32, swap_memory: bool = False, training=None, scope=None, **kwargs): """Perform dynamic decoding with `decoder`. Calls initialize() once and step() repeatedly on the Decoder object. Args: decoder: A `Decoder` instance. output_time_major: Python boolean. Default: `False` (batch major). If `True`, outputs are returned as time major tensors (this mode is faster). Otherwise, outputs are returned as batch major tensors (this adds extra time to the computation). impute_finished: Python boolean. If `True`, then states for batch entries which are marked as finished get copied through and the corresponding outputs get zeroed out. This causes some slowdown at each time step, but ensures that the final state and outputs have the correct values and that backprop ignores time steps that were marked as finished. maximum_iterations: A strictly positive `int32` scalar, the maximum allowed number of decoding steps. Default is `None` (decode until the decoder is fully done). parallel_iterations: Argument passed to `tf.while_loop`. swap_memory: Argument passed to `tf.while_loop`. training: Python boolean. Indicates whether the layer should behave in training mode or in inference mode. Only relevant when `dropout` or `recurrent_dropout` is used. scope: Optional name scope to use. **kwargs: dict, other keyword arguments for dynamic_decode. It might contain arguments for `BaseDecoder` to initialize, which takes all tensor inputs during call(). Returns: `(final_outputs, final_state, final_sequence_lengths)`. Raises: ValueError: if `maximum_iterations` is provided but is not a scalar. """ with variable_scope.variable_scope(scope, 'decoder') as varscope: ctxt = ops.get_default_graph()._get_control_flow_context() is_xla = control_flow_util.GetContainingXLAContext(ctxt) is not None in_while_loop = (control_flow_util.GetContainingWhileContext(ctxt) is not None) if not context.executing_eagerly() and not in_while_loop: if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) is_xla = not tf.executing_eagerly( ) and control_flow_util.GraphOrParentsInXlaContext( tf.compat.v1.get_default_graph()) if maximum_iterations is not None: maximum_iterations = tf.convert_to_tensor( maximum_iterations, dtype=tf.int32, name='maximum_iterations', ) if maximum_iterations.shape.ndims != 0: raise ValueError('maximum_iterations must be a scalar') tf.debugging.assert_greater( maximum_iterations, 0, message='maximum_iterations should be greater than 0', ) elif is_xla: raise ValueError( 'maximum_iterations is required for XLA compilation.') if isinstance(decoder, Decoder): initial_finished, initial_inputs, initial_state = ( decoder.initialize()) else: # For BaseDecoder that takes tensor inputs during call. decoder_init_input = kwargs.pop('decoder_init_input', None) decoder_init_kwargs = kwargs.pop('decoder_init_kwargs', {}) initial_finished, initial_inputs, initial_state = decoder.initialize( decoder_init_input, **decoder_init_kwargs) zero_outputs = tf.nest.map_structure( lambda shape, dtype: tf.zeros( _prepend_batch(decoder.batch_size, shape), dtype=dtype), decoder.output_size, decoder.output_dtype, ) if maximum_iterations is not None: initial_finished = tf.logical_or(initial_finished, 0 >= maximum_iterations) initial_sequence_lengths = tf.zeros_like(initial_finished, dtype=tf.int32) initial_time = tf.constant(0, dtype=tf.int32) def _shape(batch_size, from_shape): if (not isinstance(from_shape, tf.TensorShape) or from_shape.ndims == 0): return None else: batch_size = tf.get_static_value( tf.convert_to_tensor(batch_size, name='batch_size')) return tf.TensorShape([batch_size]).concatenate(from_shape) dynamic_size = maximum_iterations is None or not is_xla # The dynamic shape `TensoArray` is not allowed in TFLite yet. dynamic_size = dynamic_size def _create_ta(s, d): return tf.TensorArray( dtype=d, size=0 if dynamic_size else maximum_iterations, dynamic_size=dynamic_size, element_shape=_shape(decoder.batch_size, s), ) initial_outputs_ta = tf.nest.map_structure(_create_ta, decoder.output_size, decoder.output_dtype) def condition( unused_time, unused_outputs_ta, unused_state, unused_inputs, finished, unused_sequence_lengths, ): return tf.logical_not(tf.reduce_all(finished)) def body(time, outputs_ta, state, inputs, finished, sequence_lengths): """Internal while_loop body. Args: time: scalar int32 tensor. outputs_ta: structure of TensorArray. state: (structure of) state tensors and TensorArrays. inputs: (structure of) input tensors. finished: bool tensor (keeping track of what's finished). sequence_lengths: int32 tensor (keeping track of time of finish). Returns: `(time + 1, outputs_ta, next_state, next_inputs, next_finished, next_sequence_lengths)`. ``` """ ( next_outputs, decoder_state, next_inputs, decoder_finished, ) = decoder.step(time, inputs, state, training) decoder_state_sequence_lengths = False if decoder.tracks_own_finished: next_finished = decoder_finished lengths = getattr(decoder_state, 'lengths', None) if lengths is not None: # sequence lengths are provided by decoder_state.lengths; # overwrite our sequence lengths. decoder_state_sequence_lengths = True sequence_lengths = tf.cast(lengths, tf.int32) else: next_finished = tf.logical_or(decoder_finished, finished) if decoder_state_sequence_lengths: # Just pass something through the loop; at the next iteration # we'll pull the sequence lengths from the decoder_state again. next_sequence_lengths = sequence_lengths else: next_sequence_lengths = tf.where( tf.logical_not(finished), tf.fill(tf.shape(sequence_lengths), time + 1), sequence_lengths, ) tf.nest.assert_same_structure(state, decoder_state) tf.nest.assert_same_structure(outputs_ta, next_outputs) tf.nest.assert_same_structure(inputs, next_inputs) # Zero out output values past finish if impute_finished: def zero_out_finished(out, zero): if finished.shape.rank < zero.shape.rank: broadcast_finished = tf.broadcast_to( tf.expand_dims(finished, axis=-1), zero.shape) return tf.where(broadcast_finished, zero, out) else: return tf.where(finished, zero, out) emit = tf.nest.map_structure(zero_out_finished, next_outputs, zero_outputs) else: emit = next_outputs # Copy through states past finish def _maybe_copy_state(new, cur): # TensorArrays and scalar states get passed through. if isinstance(cur, tf.TensorArray): pass_through = True else: new.set_shape(cur.shape) pass_through = new.shape.ndims == 0 if not pass_through: broadcast_finished = tf.broadcast_to( tf.expand_dims(finished, axis=-1), new.shape) return tf.where(broadcast_finished, cur, new) else: return new if impute_finished: next_state = tf.nest.map_structure(_maybe_copy_state, decoder_state, state) else: next_state = decoder_state outputs_ta = tf.nest.map_structure( lambda ta, out: ta.write(time, out), outputs_ta, emit) return ( time + 1, outputs_ta, next_state, next_inputs, next_finished, next_sequence_lengths, ) res = tf.while_loop( condition, body, loop_vars=( initial_time, initial_outputs_ta, initial_state, initial_inputs, initial_finished, initial_sequence_lengths, ), parallel_iterations=parallel_iterations, maximum_iterations=maximum_iterations, swap_memory=swap_memory, ) final_outputs_ta = res[1] final_state = res[2] final_sequence_lengths = res[5] final_outputs = tf.nest.map_structure(lambda ta: ta.stack(), final_outputs_ta) try: final_outputs, final_state = decoder.finalize( final_outputs, final_state, final_sequence_lengths) except NotImplementedError: pass if not output_time_major: final_outputs = tf.nest.map_structure(_transpose_batch_time, final_outputs) return final_outputs, final_state, final_sequence_lengths
def Decoder_Dynamic_Decode( decoder, output_time_major= False, impute_finished= False, maximum_iterations= None, parallel_iterations= 32, swap_memory= False, scope= None ): if not isinstance(decoder, Decoder): raise TypeError("Expected decoder to be type Decoder, but saw: %s" % type(decoder)) with variable_scope.variable_scope(scope, "decoder") as varscope: ctxt = tf.get_default_graph()._get_control_flow_context() # pylint: disable=protected-access is_xla = control_flow_util.GetContainingXLAContext(ctxt) is not None in_while_loop = control_flow_util.GetContainingWhileContext(ctxt) is not None if not context.executing_eagerly() and not in_while_loop: if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) if maximum_iterations is not None: maximum_iterations = tf.convert_to_tensor( maximum_iterations, dtype=tf.int32, name="maximum_iterations" ) if maximum_iterations.get_shape().ndims != 0: raise ValueError("maximum_iterations must be a scalar") elif is_xla: raise ValueError("maximum_iterations is required for XLA compilation.") initial_finished, initial_inputs, initial_state = decoder.initialize() if maximum_iterations is not None: initial_finished = tf.logical_or( initial_finished, 0 >= maximum_iterations ) initial_sequence_lengths = tf.zeros_like(initial_finished, dtype=tf.int32) initial_time = tf.constant(0, dtype=tf.int32) def _shape(batch_size, from_shape): if (not isinstance(from_shape, TensorShape) or from_shape.ndims == 0): return TensorShape(None) else: batch_size = tensor_util.constant_value(tf.convert_to_tensor(batch_size, name="batch_size")) return TensorShape([batch_size]).concatenate(from_shape) dynamic_size = maximum_iterations is None or not is_xla def _create_ta(s, d): return tf.TensorArray( dtype=d, size= 0 if dynamic_size else maximum_iterations, dynamic_size= dynamic_size, element_shape= _shape(decoder.batch_size, s) ) initial_outputs_ta = nest.map_structure( _create_ta, decoder.output_size, decoder.output_dtype ) def condition( unused_time, unused_outputs_ta, unused_state, unused_inputs, finished, unused_sequence_lengths ): return tf.logical_not(tf.reduce_all(finished)) def body( time, outputs_ta, state, inputs, finished, sequence_lengths ): next_outputs, next_state, next_inputs, decoder_finished = decoder.step(time, inputs, state) if decoder.tracks_own_finished: next_finished = decoder_finished else: next_finished = tf.logical_or(decoder_finished, finished) next_finished = tf.reshape(next_finished, [-1]) #reshape이유 1: helper에서 cond에 들어가면 merge가 됨, 2: inference시에 2차원 값이 나옴 next_sequence_lengths = tf.where( tf.logical_not(finished), x= tf.fill(tf.shape(sequence_lengths), time + 1), y= sequence_lengths ) nest.assert_same_structure(state, next_state) nest.assert_same_structure(outputs_ta, next_outputs) nest.assert_same_structure(inputs, next_inputs) if impute_finished: new_linear = nest.map_structure( lambda out, zero: tf.where(finished, zero, out), next_outputs.linear, tf.zeros_like(next_outputs.linear) ) next_outputs._replace(linear= new_linear) def _maybe_copy_state(new, cur): if isinstance(cur, tf.TensorArray): pass_through = True else: new.set_shape(cur.shape) pass_through = (new.shape.ndims == 0) return new if pass_through else tf.where(finished, cur, new) next_state = nest.map_structure(_maybe_copy_state, next_state, state) outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out), outputs_ta, next_outputs) return time + 1, outputs_ta, next_state, next_inputs, next_finished, next_sequence_lengths res = tf.while_loop( cond= condition, body= body, loop_vars=[ initial_time, initial_outputs_ta, initial_state, initial_inputs, initial_finished, initial_sequence_lengths ], parallel_iterations=parallel_iterations, maximum_iterations=maximum_iterations, swap_memory=swap_memory ) final_outputs_ta, final_state, final_sequence_lengths = res[1], res[2], res[5] final_outputs = nest.map_structure(lambda ta: ta.stack(), final_outputs_ta) try: final_outputs, final_state = decoder.finalize(final_outputs, final_state, final_sequence_lengths) except NotImplementedError: pass if not output_time_major: final_outputs = nest.map_structure(rnn._transpose_batch_time, final_outputs) return final_outputs, final_state, final_sequence_lengths
def _is_on_tpu(): ctxt = framework_ops.get_default_graph()._get_control_flow_context() # pylint: disable=protected-access return control_flow_util.GetContainingXLAContext(ctxt) is not None
def _is_in_xla_context(): """Returns whether the current context is inside an XLA context.""" cur_ctxt = ops.get_default_graph()._get_control_flow_context() # pylint: disable=protected-access return control_flow_util.GetContainingXLAContext(cur_ctxt) is not None