def raged_lists_batch_to_multihot(ragged_lists_batch: tf.RaggedTensor, multihot_dim: int) -> tf.Tensor: """ Maps a batch of label indices to a batch of multi-hot ones """ # TODO: Seems tf.one_hot supports ragged tensors, so try to remove to_tensor call t = ragged_lists_batch.to_tensor(-1) # Default value = -1 -> one_hot will not assign any one t = tf.one_hot( t , multihot_dim ) t = tf.reduce_max( t , axis=1 ) return t
def unrag_tensor(x: tf.RaggedTensor, max_size: int, axis: int) -> tf.Tensor: """Converts a ragged tensor to a full tensor by padding to a maximum size. This function is useful for converting ragged tensors to a fixed size when one or more of the dimensions are of variable length. Args: x: Ragged tensor to convert. max_size: Maximum size of the axis to pad. axis: Axis of `x` to pad to `max_size`. This must specify ragged dimensions. If more than one axis is specified, `max_size` must be of the same length as `axis`. Returns: A padded version of `x`. Padding will use the equivalent of NaNs in the tensor's native dtype. This will replace the shape of the specified `axis` with `max_size`, leaving the remaining dimensions set to the bounding shape of the ragged tensor. """ bounding_shape = x.bounding_shape() axis = tf.cast(axis, tf.int64) axis = axis % len(x.shape) # Handle negative indices. axis = tf.reshape(axis, [-1, 1]) # Ensure (n, 1) shape for indexing. max_size = tf.cast(max_size, bounding_shape.dtype) max_size = tf.reshape(max_size, [-1]) # Ensure (n,) shape for indexing. shape = tf.tensor_scatter_nd_update(bounding_shape, axis, max_size) return x.to_tensor(default_value=tf.cast(np.NaN, x.dtype), shape=shape)
def pad_sequence_right( sequences_batch: tf.RaggedTensor, mask: bool) -> tf.Tensor: """ Pad sequences with zeros on right side """ # Avoid sequences larger than sequence_length: Get last sequence_length of each sequence sequences_batch = sequences_batch[:,-settings.settings.sequence_length:] if mask: # Add one to indices, to reserve 0 index for padding sequences_batch += 1 # Convert to dense, padding zeros to the right sequences_batch = sequences_batch.to_tensor(0, shape=[None, settings.settings.sequence_length]) return sequences_batch
def __init__( self, num_states: int, data: tf.Tensor, indices: tf.Tensor, indptr: tf.Tensor, states: tf.RaggedTensor, ref_ids: tf.RaggedTensor, ref_ids_lookup: Dict[str, int], ) -> None: self._num_states = tf.Variable([num_states], trainable=False) self._data = tf.Variable(data, trainable=False) self._indices = tf.Variable(indices, trainable=False) self._indptr = tf.Variable(indptr, trainable=False) self._states = tf.Variable(states.to_tensor(), trainable=False) self._ref_ids = tf.Variable(ref_ids.to_tensor(), trainable=False) keys = tf.convert_to_tensor(list(ref_ids_lookup.keys()), dtype=tf.string) values = tf.convert_to_tensor(list(ref_ids_lookup.values()), dtype=tf.int32) initializer = tf.lookup.KeyValueTensorInitializer(keys, values) self._ref_ids_lookup = tf.lookup.StaticHashTable(initializer, default_value=0)