Beispiel #1
0
    def Encode(self, text):
        """Converts string `text` to integer ids and the encoded string.

    Encoding includes prefixing the beginning-of-word token to each word.

    Returns:
      ids: the encoded integer ids.
      tokens: the encoded string.
    """
        words = tf.sparse.to_dense(tf.strings.split([text]),
                                   default_value='')[0]
        num_words = tf.size(words)
        ids_ta = tf.TensorArray(tf.int32, 0, dynamic_size=True)

        def _WordsToIds(i, words, ids_ta):
            encoded_ids = self._EncodeToIds(BOW_STR + words[i])
            ids_ta = ids_ta.scatter(
                tf.range(ids_ta.size(),
                         ids_ta.size() + tf.size(encoded_ids)), encoded_ids)
            return i + 1, words, ids_ta

        _, _, ids_ta = tf.while_loop(lambda i, *_: i < num_words,
                                     _WordsToIds,
                                     loop_vars=(tf.constant(0, tf.int32),
                                                words, ids_ta),
                                     parallel_iterations=30,
                                     back_prop=False)

        ids = ids_ta.stack()
        return ids, self._TokenToString(ids)
Beispiel #2
0
        def _MergeCandidates(tokens, candidates):
            """Merge in the reverse binary tree."""
            best_id = tf.argmin(candidates, output_type=tf.int32)
            # Perform the merge at position best_id.
            tokens = tf.concat([
                tokens[:best_id], [candidates[best_id]], tokens[best_id + 2:]
            ],
                               axis=0)
            # Recompute the merge candidates.
            # Only the neighbors of best_id need to be recomputed.
            empty = tf.zeros([0], dtype=candidates.dtype)

            def _MergeLeft():
                return tf.concat([
                    candidates[:best_id - 1],
                    _MergeOneToken(tokens, best_id - 1)
                ],
                                 axis=0)

            left_candidates = tf.cond(tf.equal(best_id, 0), lambda: empty,
                                      _MergeLeft)

            def _MergeRight():
                return tf.concat([
                    _MergeOneToken(tokens, best_id), candidates[best_id + 2:]
                ],
                                 axis=0)

            right_candidates = tf.cond(
                tf.greater_equal(best_id,
                                 tf.size(tokens) - 1), lambda: empty,
                _MergeRight)

            candidates = tf.concat([left_candidates, right_candidates], axis=0)
            return tokens, candidates
Beispiel #3
0
    def _update_mask(self, weights, threshold):
        """Updates the mask for a given weight tensor.

    This functions first computes the cdf of the weight tensor, and estimates
    the threshold value such that 'desired_sparsity' fraction of weights
    have magnitude less than the threshold.

    Args:
      weights: The weight tensor that needs to be masked.
      threshold: The current threshold value. The function will compute a new
        threshold and return the exponential moving average using the current
        value of threshold

    Returns:
      new_threshold: The new value of the threshold based on weights, and
        sparsity at the current global_step
      new_mask: A numpy array of the same size and shape as weights containing
        0 or 1 to indicate which of the values in weights falls below
        the threshold

    Raises:
      ValueError: if sparsity is not defined
    """
        if self._sparsity is None:
            raise ValueError('Sparsity variable undefined')

        sparsity = self._get_sparsity(weights.op.name)
        with tf.name_scope(weights.op.name + '_pruning_ops'):
            abs_weights = tf.abs(weights)
            k = tf.cast(
                tf.round(
                    tf.cast(tf.size(abs_weights), tf.float32) *
                    (1 - sparsity)), tf.int32)
            # Sort the entire array
            values, _ = tf.nn.top_k(tf.reshape(abs_weights, [-1]),
                                    k=tf.size(abs_weights))
            # Grab the (k-1) th value
            current_threshold = tf.gather(values, k - 1)
            smoothed_threshold = tf.add_n([
                tf.multiply(current_threshold, 1 - self._spec.threshold_decay),
                tf.multiply(threshold, self._spec.threshold_decay)
            ])

            new_mask = tf.cast(
                tf.greater_equal(abs_weights, smoothed_threshold), tf.float32)

        return smoothed_threshold, new_mask
Beispiel #4
0
 def _WordsToIds(i, words, ids_ta):
     encoded_ids = self._EncodeToIds(BOW_STR + words[i])
     ids_ta = ids_ta.scatter(
         tf.range(ids_ta.size(),
                  ids_ta.size() + tf.size(encoded_ids)), encoded_ids)
     return i + 1, words, ids_ta
Beispiel #5
0
 def ReadInput(line):
     word_count = tf.size(tf.strings.split([line]))
     strlen = tf.size(tf.strings.split([line], ''))
     return [line, word_count], strlen