def _get_sequence_dense_tensor( self, inputs, weight_collections=None, trainable=None): # Do nothing with weight_collections and trainable since no variables are # created in this function. del weight_collections del trainable sp_tensor = inputs.get(self) dense_tensor = sparse_ops.sparse_tensor_to_dense( sp_tensor, default_value=self.default_value) # Reshape into [batch_size, T, variable_shape]. dense_shape = array_ops.concat( [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], axis=0) dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) # Get the number of timesteps per example # For the 2D case, the raw values are grouped according to num_elements; # for the 3D case, the grouping happens in the third dimension, and # sequence length is not affected. num_elements = (self._variable_shape.num_elements() if sp_tensor.shape.ndims == 2 else 1) seq_length = fc_utils.sequence_length_from_sparse_tensor( sp_tensor, num_elements=num_elements) return fc._SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=seq_length)
def _get_sequence_dense_tensor(self, inputs, weight_collections=None, trainable=None): # Do nothing with weight_collections and trainable since no variables are # created in this function. del weight_collections del trainable sp_tensor = inputs.get(self) dense_tensor = sparse_ops.sparse_tensor_to_dense( sp_tensor, default_value=self.default_value) # Reshape into [batch_size, T, variable_shape]. dense_shape = array_ops.concat( [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], axis=0) dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) # Get the number of timesteps per example # For the 2D case, the raw values are grouped according to num_elements; # for the 3D case, the grouping happens in the third dimension, and # sequence length is not affected. num_elements = (self._variable_shape.num_elements() if sp_tensor.shape.ndims == 2 else 1) seq_length = fc_utils.sequence_length_from_sparse_tensor( sp_tensor, num_elements=num_elements) return fc._SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=seq_length)
def get_sequence_dense_tensor(self, transformation_cache, state_manager): """Returns a `TensorSequenceLengthPair`. Args: transformation_cache: A `FeatureTransformationCache` object to access features. state_manager: A `StateManager` to create / access resources such as lookup tables. """ sp_tensor = transformation_cache.get(self, state_manager) dense_tensor = sparse_ops.sparse_tensor_to_dense( sp_tensor, default_value=self.default_value) # Reshape into [batch_size, T, variable_shape]. dense_shape = array_ops.concat( [array_ops.shape(dense_tensor)[:1], [-1], self.variable_shape], axis=0) dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) # Get the number of timesteps per example # For the 2D case, the raw values are grouped according to num_elements; # for the 3D case, the grouping happens in the third dimension, and # sequence length is not affected. if sp_tensor.shape.ndims == 2: num_elements = self.variable_shape.num_elements() else: num_elements = 1 seq_length = fc_utils.sequence_length_from_sparse_tensor( sp_tensor, num_elements=num_elements) return fc.SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=seq_length)
def split_inputs(ctx, features, labels, num_cores_per_batch=1): """Splits the dense and sparse tensors inside the features and labels.""" enqueue_datas = collections.OrderedDict() if ctx.embedding_config: tpu_embedding_ = ctx.embedding_config.tpu_embedding for feature_key in tpu_embedding_.feature_to_config_dict: sparse_feature = _get_sparse_feature_from_feature(feature_key, features) max_sequence_length = tpu_embedding_.feature_to_config_dict[ feature_key].max_sequence_length combiner = tpu_embedding_._table_to_config_dict[ tpu_embedding_._feature_to_config_dict[feature_key].table_id].combiner if max_sequence_length > 0: length_feature_name = ( tpu_fc.get_sequence_length_feature_key_name_from_feature_key_name( feature_key)) length_feature = tf.math.minimum( fc_utils.sequence_length_from_sparse_tensor(sparse_feature), max_sequence_length) length_feature.set_shape(ctx.batch_size_for_input_fn) features[length_feature_name] = length_feature weight_key = tpu_embedding_.feature_to_config_dict[feature_key].weight_key sparse_feature_split = _split_tensor( sparse_feature, num_cores_per_batch) if combiner is None and not isinstance(sparse_feature, tf.sparse.SparseTensor): # A dense tensor with no combiner was provided so we assume that each # of the embedding_indices belongs to a different sample (setting # sample_indices to None). if weight_key is not None: raise ValueError( 'Found weights {} for weighted_categorical_column, which is not' 'compatible with sparse feature {} enqueued as dense tensor.' .format(weight_key, feature_key)) enqueue_data = [] for i in range(num_cores_per_batch): enqueue_data.append(tpu_embedding.EnqueueData( sparse_feature_split[i])) else: weights = None if isinstance(sparse_feature, tf.sparse.SparseTensor): weights = _get_weights_from_features(weight_key, features) weights_split = _split_tensor(weights, num_cores_per_batch) enqueue_data = [] for i in range(num_cores_per_batch): split_weights = weights_split[i] if weights else None enqueue_data.append( tpu_embedding.EnqueueData.from_sparse_tensor( _maybe_dense_to_sparse(sparse_feature_split[i]), weights=split_weights)) enqueue_datas[feature_key] = enqueue_data # Transpose the enqueue_datas dict into a list of dicts enqueue_datas_list = [] for i in range(num_cores_per_batch): enqueue_data = {} for key, value in enqueue_datas.items(): enqueue_data[key] = value[i] enqueue_datas_list.append(enqueue_data) return features, labels, enqueue_datas_list
def get_sequence_dense_tensor(self, transformation_cache, state_manager): """Returns a `TensorSequenceLengthPair`. Args: transformation_cache: A `FeatureTransformationCache` object to access features. state_manager: A `StateManager` to create / access resources such as lookup tables. """ sp_tensor = transformation_cache.get(self, state_manager) dense_tensor = sparse_ops.sparse_tensor_to_dense( sp_tensor, default_value=self.default_value) # Reshape into [batch_size, T, variable_shape]. dense_shape = array_ops.concat( [array_ops.shape(dense_tensor)[:1], [-1], self.variable_shape], axis=0) dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) # Get the number of timesteps per example # For the 2D case, the raw values are grouped according to num_elements; # for the 3D case, the grouping happens in the third dimension, and # sequence length is not affected. if sp_tensor.shape.ndims == 2: num_elements = self.variable_shape.num_elements() else: num_elements = 1 seq_length = fc_utils.sequence_length_from_sparse_tensor( sp_tensor, num_elements=num_elements) return fc.SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=seq_length)
def _get_sequence_dense_tensor(self, inputs, weight_collections=None, trainable=None): # Do nothing with weight_collections and trainable since no variables are # created in this function. del weight_collections del trainable if not isinstance( self.categorical_column, (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)): # pylint: disable=protected-access raise ValueError( 'In indicator_column: {}. ' 'categorical_column must be of type _SequenceCategoricalColumn ' 'to use SequenceFeatures. ' 'Suggested fix: Use one of sequence_categorical_column_with_*. ' 'Given (type {}): {}'.format(self.name, type(self.categorical_column), self.categorical_column)) # Feature has been already transformed. Return the intermediate # representation created by _transform_feature. dense_tensor = inputs.get(self) sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access sequence_length = fc_utils.sequence_length_from_sparse_tensor( sparse_tensors.id_tensor) return SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=sequence_length)
def sparse_tensor_len(): # sequence_length_from_sparse_tensor 获得每个文档的长度。 # sparse tensor保存了batch size这么多个文档,由于sparse的格式,所以每个文档的长度是不一样的,sparse tensor的dense shape是最长的文档的长度。 # 这个op计算每个文档的长度 with tf.Graph().as_default(): sess = tf.Session() sp_tensor = create_sparse_tensor() seq_len = sequence_length_from_sparse_tensor(sp_tensor) print(sess.run(seq_len))
def tfhub_embedding(x, embedding_layer, embedding_size): sp_tensor = tf.compat.v1.string_split(x, sep=' ') seq_length = sequence_length_from_sparse_tensor(sp_tensor) dense_tensor = tf.sparse.to_dense(sp_tensor) batch_size = tf.shape(dense_tensor)[0] flatten_embedding = embedding_layer(tf.reshape(dense_tensor, (-1, ))) seq_embedding = tf.reshape(flatten_embedding, (batch_size, -1, embedding_size)) return seq_embedding, seq_length
def split_inputs(ctx, features, labels, num_cores_per_batch=1): """Splits the dense and sparse tensors inside the features and labels.""" enqueue_datas = collections.OrderedDict() if ctx.embedding_config: tpu_embedding_ = ctx.embedding_config.tpu_embedding for feature_key in tpu_embedding_.feature_to_config_dict: sparse_feature = _get_sparse_feature_from_feature( feature_key, features) max_sequence_length = tpu_embedding_.feature_to_config_dict[ feature_key].max_sequence_length if max_sequence_length > 0: length_feature_name = ( tpu_fc. get_sequence_length_feature_key_name_from_feature_key_name( feature_key)) length_feature = math_ops.minimum( fc_utils.sequence_length_from_sparse_tensor( sparse_feature), max_sequence_length) length_feature.set_shape(ctx.batch_size_for_input_fn) features[length_feature_name] = length_feature weight_key = tpu_embedding_.feature_to_config_dict[ feature_key].weight_key sparse_feature_split = _split_tensor(sparse_feature, num_cores_per_batch) if isinstance(sparse_feature, sparse_tensor.SparseTensor): weights = _get_weights_from_features(weight_key, features) weights_split = _split_tensor(weights, num_cores_per_batch) enqueue_data = [] for i in range(num_cores_per_batch): enqueue_data.append( tpu_embedding.EnqueueData.from_sparse_tensor( sparse_feature_split[i], weights_split[i])) else: if weight_key is not None: raise ValueError( 'Found weights {} for weighted_categorical_column, which is not' 'compatible with sparse feature {} enqueued as dense tensor.' .format(weight_key, feature_key)) enqueue_data = [] for i in range(num_cores_per_batch): enqueue_data.append( tpu_embedding.EnqueueData(sparse_feature_split[i])) enqueue_datas[feature_key] = enqueue_data # Transpose the enqueue_datas dict into a list of dicts enqueue_datas_list = [] for i in range(num_cores_per_batch): enqueue_data = {} for key, value in enqueue_datas.items(): enqueue_data[key] = value[i] enqueue_datas_list.append(enqueue_data) return features, labels, enqueue_datas_list
def get_sequence_dense_tensor(self, transformation_cache, state_manager): """See `SequenceDenseColumn` base class.""" if not isinstance(self.categorical_column, SequenceCategoricalColumn): raise ValueError( 'In indicator_column: {}. categorical_column must be of type SequenceCategoricalColumn ' 'to use SequenceFeatures. Suggested fix: Use one of sequence_categorical_column_with_*. ' 'Given (type {}): {}'.format(self.name, type(self.categorical_column), self.categorical_column)) # Feature has been already transformed. Return the intermediate representation created by transform_feature. dense_tensor = transformation_cache.get(self, state_manager) sparse_tensors = self.categorical_column.get_sparse_tensors( transformation_cache, state_manager) sequence_length = fc_utils.sequence_length_from_sparse_tensor( sparse_tensors.id_tensor) return SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=sequence_length)
def split_inputs(ctx, features, labels): """Splits the dense and sparse tensors inside the features and labels.""" enqueue_datas = collections.OrderedDict() if ctx.embedding_config: tpu_embedding_ = ctx.embedding_config.tpu_embedding feature_to_weight_key_name_dict = ( ctx.embedding_config.feature_to_weight_key_name_dict) for feature_key in tpu_embedding_.feature_to_config_dict: sparse_feature = _get_sparse_feature_from_feature( feature_key, features) max_sequence_length = tpu_embedding_.feature_to_config_dict[ feature_key].max_sequence_length if max_sequence_length > 0: length_feature_name = ( tpu_fc. get_sequence_length_feature_key_name_from_feature_key_name( feature_key)) length_feature = math_ops.minimum( fc_utils.sequence_length_from_sparse_tensor( sparse_feature), max_sequence_length) length_feature.set_shape(ctx.batch_size_for_input_fn) features[length_feature_name] = length_feature weight_key_name = feature_to_weight_key_name_dict[feature_key] if isinstance(sparse_feature, sparse_tensor.SparseTensor): weights = _get_weights_from_features(weight_key_name, features) enqueue_data = tpu_embedding.EnqueueData.from_sparse_tensor( sparse_feature, weights) else: if weight_key_name is not None: raise ValueError( 'Found weights {} for weighted_categorical_column, which is not' 'compatible with sparse feature {} enqueued as dense tensor.' .format(weight_key_name, feature_key)) enqueue_data = tpu_embedding.EnqueueData(sparse_feature) enqueue_datas[feature_key] = enqueue_data return features, labels, enqueue_datas