def Encode(self, text): """Converts string `text` to integer ids and the encoded string. Encoding includes prefixing the beginning-of-word token to each word. Returns: ids: the encoded integer ids. tokens: the encoded string. """ words = tf.sparse.to_dense(tf.strings.split([text]), default_value='')[0] num_words = tf.size(words) ids_ta = tf.TensorArray(tf.int32, 0, dynamic_size=True) def _WordsToIds(i, words, ids_ta): encoded_ids = self._EncodeToIds(BOW_STR + words[i]) ids_ta = ids_ta.scatter( tf.range(ids_ta.size(), ids_ta.size() + tf.size(encoded_ids)), encoded_ids) return i + 1, words, ids_ta _, _, ids_ta = tf.while_loop(lambda i, *_: i < num_words, _WordsToIds, loop_vars=(tf.constant(0, tf.int32), words, ids_ta), parallel_iterations=30, back_prop=False) ids = ids_ta.stack() return ids, self._TokenToString(ids)
def _MergeCandidates(tokens, candidates): """Merge in the reverse binary tree.""" best_id = tf.argmin(candidates, output_type=tf.int32) # Perform the merge at position best_id. tokens = tf.concat([ tokens[:best_id], [candidates[best_id]], tokens[best_id + 2:] ], axis=0) # Recompute the merge candidates. # Only the neighbors of best_id need to be recomputed. empty = tf.zeros([0], dtype=candidates.dtype) def _MergeLeft(): return tf.concat([ candidates[:best_id - 1], _MergeOneToken(tokens, best_id - 1) ], axis=0) left_candidates = tf.cond(tf.equal(best_id, 0), lambda: empty, _MergeLeft) def _MergeRight(): return tf.concat([ _MergeOneToken(tokens, best_id), candidates[best_id + 2:] ], axis=0) right_candidates = tf.cond( tf.greater_equal(best_id, tf.size(tokens) - 1), lambda: empty, _MergeRight) candidates = tf.concat([left_candidates, right_candidates], axis=0) return tokens, candidates
def _update_mask(self, weights, threshold): """Updates the mask for a given weight tensor. This functions first computes the cdf of the weight tensor, and estimates the threshold value such that 'desired_sparsity' fraction of weights have magnitude less than the threshold. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if sparsity is not defined """ if self._sparsity is None: raise ValueError('Sparsity variable undefined') sparsity = self._get_sparsity(weights.op.name) with tf.name_scope(weights.op.name + '_pruning_ops'): abs_weights = tf.abs(weights) k = tf.cast( tf.round( tf.cast(tf.size(abs_weights), tf.float32) * (1 - sparsity)), tf.int32) # Sort the entire array values, _ = tf.nn.top_k(tf.reshape(abs_weights, [-1]), k=tf.size(abs_weights)) # Grab the (k-1) th value current_threshold = tf.gather(values, k - 1) smoothed_threshold = tf.add_n([ tf.multiply(current_threshold, 1 - self._spec.threshold_decay), tf.multiply(threshold, self._spec.threshold_decay) ]) new_mask = tf.cast( tf.greater_equal(abs_weights, smoothed_threshold), tf.float32) return smoothed_threshold, new_mask
def _WordsToIds(i, words, ids_ta): encoded_ids = self._EncodeToIds(BOW_STR + words[i]) ids_ta = ids_ta.scatter( tf.range(ids_ta.size(), ids_ta.size() + tf.size(encoded_ids)), encoded_ids) return i + 1, words, ids_ta
def ReadInput(line): word_count = tf.size(tf.strings.split([line])) strlen = tf.size(tf.strings.split([line], '')) return [line, word_count], strlen