def _get_sequence_dense_tensor( self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): if self._partition_strategy == 'mod': raise NotImplementedError('Export saved model does not support MOD ' 'sharded embeddings.') def host_computation(): return fc._EmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): if self._partition_strategy == 'mod': raise NotImplementedError('TPUEmbedding on CPU does not support MOD ' 'sharded embeddings.') return fc._EmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) tensor = inputs.get(self.get_feature_key_name()) tensor_lengths = inputs.get(self.get_sequence_length_feature_key_name()) # inputs is a _LazyBuilder and for rank 1 tensors, it calls expand_dims(-1). # We need to undo this to match the standard CPU sequence embedding. tensor_lengths = array_ops.squeeze(tensor_lengths, -1) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name(self.get_embedding_var_name(), 'embedding_weights') return fc._SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=tensor, sequence_length=tensor_lengths)
def _get_sequence_dense_tensor( self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): if self._partition_strategy == 'mod': raise NotImplementedError('Export saved model does not support MOD ' 'sharded embeddings.') def host_computation(): return fc._SharedEmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): if self._partition_strategy == 'mod': raise NotImplementedError('TPUEmbedding on CPU does not support MOD ' 'sharded embeddings.') return fc._SharedEmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) tensor = inputs.get(self.get_feature_key_name()) tensor_lengths = inputs.get(self.get_sequence_length_feature_key_name()) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name( self.get_embedding_var_name(), 'embedding_weights', is_shared_embedding=True) return fc._SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=tensor, sequence_length=tensor_lengths)
def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): if self._partition_strategy == 'mod': raise NotImplementedError('Export saved model does not support MOD ' 'sharded embeddings.') def host_computation(): return fc._EmbeddingColumn._get_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): if self._partition_strategy == 'mod': raise NotImplementedError('TPUEmbedding on CPU does not support MOD ' 'sharded embeddings.') return fc._EmbeddingColumn._get_dense_tensor( self, inputs, weight_collections, trainable) # TPU mode # Get the embeddings from the LazyBuilder. tensor = inputs.get(self.get_feature_key_name()) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name(self.get_embedding_var_name(), 'embedding_weights') return tensor
def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): def host_computation(): return fc._EmbeddingColumn._get_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): return fc._EmbeddingColumn._get_dense_tensor( self, inputs, weight_collections, trainable) # TPU mode # Get the embeddings from the LazyBuilder. tensor = inputs.get(self.get_feature_key_name()) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name(self.get_embedding_var_name(), 'embedding_weights') return tensor
def computation(x): return tpu.outside_compilation(computation_with_string_ops, x)
def computation(x): w = tpu.outside_compilation(host_computation, x) y = w + 1.0 z = tpu.outside_compilation(host_computation, y) return z + 5.0
def computation(x): x = x + 1.0 y = tpu.outside_compilation(host_computation, x) y = tpu.outside_compilation(host_computation, x) return y + 1.0
def tpu_fn(x): x2 = x + 5.0 while x2 < 50.0: x2 = tpu.outside_compilation(outside_fn, x2) return x2 + 4.0
def tpu_fn(x): x2 = x + 5.0 if x < 50.0: return tpu.outside_compilation(outside_fn, x2) else: return x2
def tpu_fn(x): x2 = x + 5.0 output1 = tpu.outside_compilation(outside_fn1, x2) x3 = output1 + 3.0 output2 = tpu.outside_compilation(outside_fn2, x3) return output2
def train_step(x): x2 = x + 5.0 logging_ops.print_v2(x2) x2 = tpu.outside_compilation(host_computation, x2) return x2 + 4.0
def tpu_function(sparse): lookup = tpu.outside_compilation( embedding_ops.safe_embedding_lookup_sparse, table, sparse) return math_ops.reduce_sum(lookup, axis=0)
def _embedding_lookup_for_ragged_tensor( self, inp: ragged_tensor.RaggedTensor, weight: Optional[ragged_tensor.RaggedTensor], table: tf_variables.Variable, feature: tpu_embedding_v2_utils.FeatureConfig) -> ops.Tensor: """Embedding lookup for ragged tensor based on its feature config. Args: inp: a single rank 2 RaggedTensor input. weight: None or RaggedTensor which has the same shape of the input. table: a table variable. feature: a feature config. Returns: Embedding lookup result. Raises: ValueError: if input ragged tensor is not rank 2 or output shape set in the feature config doesn't match with the first dim size of the input. """ if inp.shape.rank != 2: raise ValueError( "Only rank 2 ragged tensor is supported, but got rank {}". format(inp.shape.rank)) batch_size = inp.shape[0] # This computation needs to placed outside of tpu as the size of the row # splits and values can change for different batch which can cause # the program to re-compile. def ragged_to_dense_outside_compilation(inp, weight, batch_size, feature): if weight is None: weight = ragged_tensor.RaggedTensor.from_row_splits( array_ops.ones_like(inp.values, dtype=dtypes.float32), inp.row_splits) if not feature.output_shape and feature.max_sequence_length > 0: inp = inp.to_tensor(shape=(batch_size, feature.max_sequence_length)) # Ignore weight if it is a sequence feature. weight = array_ops.ones_like(inp, dtype=dtypes.float32) elif feature.output_shape: # Eagerly run the following op as the result as to be a number in # order to use it as part of the output shape. with ops.init_scope(): output_batch_size = math_ops.reduce_prod( feature.output_shape).numpy() # If the output batch size matches the data batch size, treat it as # normal ragged input. if output_batch_size == batch_size: inp, weight = inp.to_tensor(), weight.to_tensor() # If the data batch size is a factor of the output batch size, the # divide result will be the sequence length. Ignore the weights and # combiner. elif output_batch_size > batch_size and output_batch_size % batch_size == 0: # Pad or truncate in the sequence dimension seq_length = output_batch_size // batch_size inp = inp.to_tensor(shape=(batch_size, seq_length)) # Ignore weight if it is a sequence feature. weight = array_ops.ones_like(inp, dtype=dtypes.float32) else: raise ValueError( "Output shape set in the FeatureConfig should be the factor of " "the input data batch size. But instead got output shape {}, " "input data batch size {}".format( feature.output_shape, batch_size)) else: inp, weight = inp.to_tensor(), weight.to_tensor() return inp, weight inp, weight = tpu.outside_compilation( ragged_to_dense_outside_compilation, inp=inp, weight=weight, batch_size=batch_size, feature=feature) embeddings = embedding_ops.embedding_lookup_v2(table, inp) weight = array_ops.expand_dims(weight, -1) embeddings *= weight if feature.output_shape: with ops.init_scope(): output_batch_size = math_ops.reduce_prod( feature.output_shape).numpy() if output_batch_size == batch_size: embeddings = self._apply_combiner_to_embeddings( embeddings, weight, feature.table.combiner) embeddings = array_ops.reshape(embeddings, shape=feature.output_shape + [feature.table.dim]) else: if feature.max_sequence_length == 0: embeddings = self._apply_combiner_to_embeddings( embeddings, weight, feature.table.combiner) return embeddings
def assign_add(): v.assign_add(2.0) tpu.outside_compilation(assign_fn) v.assign_add(3.0)
def fn(x): y = x + 1 z = tpu.outside_compilation(host_inc, y) a = z + 1 return a
def tpu_fn(x): x2 = x + 5.0 tpu.outside_compilation(outside_fn, x2) return x2 + 5.0
def tpu_fn(x): x2 = x + 5.0 output = tpu.outside_compilation(outside_fn, x2) return output
def tpu_fn(x, y): a = x + 7.0 b = y * 2.0 c, d, e = tpu.outside_compilation(outside_fn, a, b) return (math_ops.reduce_max(c) + math_ops.reduce_min(d) + math_ops.reduce_sum(e))