예제 #1
0
    def call(self, texts: tf.Tensor,
             substrings: tf.RaggedTensor) -> tf.RaggedTensor:
        texts = tf.strings.regex_replace(texts,
                                         pattern=self.special_chars,
                                         rewrite='')
        texts = tf.strings.strip(
            tf.strings.regex_replace(texts, pattern=r'\s{2,}', rewrite=' '))

        substrings = tf.strings.regex_replace(substrings,
                                              pattern=self.special_chars,
                                              rewrite='')
        substrings = tf.strings.strip(
            tf.strings.regex_replace(substrings,
                                     pattern=r'\s{2,}',
                                     rewrite=' '))

        pre = r'.*(\s|^)'
        post = r'(\s|$).*'

        ragged_texts = tf.RaggedTensor.from_row_lengths(
            values=tf.repeat(texts, repeats=substrings.row_lengths()),
            row_lengths=substrings.row_lengths())
        return tf.ragged.map_flat_values(
            self.find_match, ragged_texts,
            tf.strings.join([pre, substrings, post]))
예제 #2
0
파일: eges.py 프로젝트: ritchietop/models
def embedding_pooling(tensor: tf.RaggedTensor, combiner: str = "sqrtn"):
    tensor_sum = tf.math.reduce_sum(tensor, axis=1)
    if combiner == "sum":
        return tensor_sum
    row_lengths = tf.expand_dims(tensor.row_lengths(axis=1), axis=1)
    row_lengths = tf.math.maximum(tf.ones_like(row_lengths), row_lengths)
    row_lengths = tf.cast(row_lengths, dtype=tf.float32)
    if combiner == "mean":
        return tensor_sum / row_lengths
    if combiner == "sqrtn":
        return tensor_sum / tf.math.sqrt(row_lengths)
예제 #3
0
    def _run_model_filter_empty_sequences(self,
                                          batch_item_indices: tf.RaggedTensor,
                                          batch_customer_indices, n_results):

        # Check if there are empty sequences
        sequences_lenghts = batch_item_indices.row_lengths()
        non_empty_seq_count = tf.math.count_nonzero(sequences_lenghts)
        n_sequences = tf.shape(sequences_lenghts, tf.int64)[0]

        #print(">>>", non_empty_seq_count, n_results)
        if non_empty_seq_count == 0:
            # All sequences are empty
            label_predictions = tf.zeros([n_sequences, n_results],
                                         dtype=tf.string)
            probs_predictions = tf.zeros([n_sequences, n_results],
                                         dtype=tf.float32)
            return (label_predictions, probs_predictions)

        elif non_empty_seq_count >= n_sequences:
            # There are no empty sequences. Run the model with the full batch
            return self._run_model_and_postprocess(batch_item_indices,
                                                   batch_customer_indices,
                                                   n_results)
        else:
            # There are some empty sequences
            # Model will fail if a sequence is empty, and it seems it's the expected behaviour: Do not feed empty sequences
            # Get non empty sequences mask
            non_empty_mask = tf.math.greater(sequences_lenghts, 0)

            # Get non empty sequences
            non_empty_sequences: tf.RaggedTensor = tf.ragged.boolean_mask(
                batch_item_indices, non_empty_mask)
            non_empty_customers = tf.boolean_mask(batch_customer_indices,
                                                  non_empty_mask)

            # Run model
            label_predictions, probs_predictions = self._run_model_and_postprocess(
                non_empty_sequences, non_empty_customers, n_results)

            # Merge real predictions with empty predictions for empty sequences:
            indices = tf.where(non_empty_mask)
            final_shape = [n_sequences, n_results]
            label_predictions = tf.scatter_nd(indices, label_predictions,
                                              final_shape)
            #print(label_predictions)
            probs_predictions = tf.scatter_nd(indices, probs_predictions,
                                              final_shape)
            #print(probs_predictions)
            return (label_predictions, probs_predictions)
예제 #4
0
def pad_sequence_left(sequences_batch: tf.RaggedTensor, mask: bool):
    """ Pad sequences with zeros on left side """

    # Truncate rows to have at most `settings.SEQUENCE_LENGTH` items
    sequences_batch = sequences_batch[:,-settings.settings.sequence_length:]

    if mask:
        # Add one to indices, to reserve 0 index for padding
        sequences_batch += 1

    pad_row_lengths = settings.settings.sequence_length - sequences_batch.row_lengths()
    pad_values = tf.zeros( [(settings.settings.sequence_length * sequences_batch.nrows()) - tf.size(sequences_batch, tf.int64)], 
        sequences_batch.dtype)
    padding = tf.RaggedTensor.from_row_lengths(pad_values, pad_row_lengths)
    return tf.concat([padding, sequences_batch], axis=1).to_tensor()
예제 #5
0
파일: din.py 프로젝트: ritchietop/models
def multi_behavior_embedding_pooling(tensor: tf.RaggedTensor,
                                     combiner: str = "sqrtn"):
    if len(tensor.shape) == 3:
        tensor = tf.expand_dims(
            tensor, axis=2)  # batch_size * behavior_count * 1 * embedding_size
    tensor_sum = tf.math.reduce_sum(
        tensor, axis=2)  # batch_size * behavior_count * embedding_size
    if combiner == "sum":
        return tensor_sum
    row_lengths = tf.expand_dims(tensor.row_lengths(axis=2), axis=2)
    row_lengths = tf.math.maximum(tf.ones_like(row_lengths), row_lengths)
    row_lengths = tf.cast(row_lengths, dtype=tf.float32)
    if combiner == "mean":
        return tensor_sum / row_lengths
    if combiner == "sqrtn":
        return tensor_sum / tf.math.sqrt(row_lengths)