def _process(record): num, = tf.py_func(str_to_num, [record], [tf.float32]) num = tf.stack([num, tf.square(num)]) if use_nested_map: return py_utils.NestedMap(record=record, num=num), 1 else: return [record, num], 1
def FinalizeImage(self): """Finishes creation of the overall figure, returning the image tensor.""" subplot_grid_shape = self._subplot_grid_shape if subplot_grid_shape is None: subplot_grid_shape = (len(self._subplots), 1) # AddMatplotlibFigureSummary (due to restrictions of py_func) only supports # flattened list of tensors so we must do some bookkeeping to maintain a # mapping from _SubplotMetadata object to flattened_tensors. subplot_slices = [] flattened_tensors = [] for subplot in self._subplots: start = len(flattened_tensors) subplot_slices.append((start, start + len(subplot.tensor_list))) flattened_tensors.extend(subplot.tensor_list) def PlotFunc(fig, *numpy_data_list): gs = gridspec.GridSpec(*subplot_grid_shape, **self._gridspec_kwargs) for n, subplot in enumerate(self._subplots): axes = fig.add_subplot(gs[n]) start, end = subplot_slices[n] subplot_data = numpy_data_list[start:end] subplot.plot_func(fig, axes, *subplot_data) func = functools.partial(_RenderMatplotlibFigures, self._figsize, self._max_outputs, PlotFunc) batch_sizes = [tf.shape(t)[0] for t in flattened_tensors] num_tensors = len(flattened_tensors) with tf.control_dependencies([ tf.assert_equal( batch_sizes, [batch_sizes[0]] * num_tensors, summarize=num_tensors) ]): return tf.py_func( func, flattened_tensors, tf.uint8, name='RenderMatplotlibFigures')
def _process(source_id, record): num, = tf.py_func(str_to_num, [record], [tf.float32]) num = tf.stack([num, tf.square(num)]) if use_nested_map: return py_utils.NestedMap( source_id=source_id, record=record, num=num), bucket_fn(num) else: return [source_id, record, num], bucket_fn(num)
def _ProcessLine(self, line): """A single-text-line processor. Gets a string tensor representing a line of text that have been read from the input file, and splits it to graphemes (characters). We use original characters as the target labels, and the lowercased and punctuation-removed characters as the source labels. Args: line: a 1D string tensor. Returns: A list of tensors, in the expected order by __init__. """ # Tokenize the input into integer ids. # tgt_ids has the start-of-sentence token prepended, and tgt_labels has the # end-of-sentence token appended. tgt_ids, tgt_labels, tgt_paddings = self.StringsToIds( tf.convert_to_tensor([line])) def Normalize(line): # Lowercase and remove punctuation. line = line.lower().translate(None, string.punctuation.encode('utf-8')) # Convert multiple consecutive spaces to a single one. line = b' '.join(line.split()) return line normalized_line = tf.py_func(Normalize, [line], tf.string, stateful=False) _, src_labels, src_paddings = self.StringsToIds(tf.convert_to_tensor( [normalized_line]), is_source=True) # The model expects the source without a start-of-sentence token. src_ids = src_labels # Compute the length for bucketing. bucket_key = tf.cast( tf.round( tf.maximum(tf.reduce_sum(1.0 - src_paddings), tf.reduce_sum(1.0 - tgt_paddings))), tf.int32) tgt_weights = 1.0 - tgt_paddings # Return tensors in an order consistent with __init__. out_tensors = [ src_ids, src_paddings, tgt_ids, tgt_paddings, tgt_labels, tgt_weights ] return [tf.squeeze(t, axis=0) for t in out_tensors], bucket_key
def bleu_score(predictions, labels, **unused_kwargs): """BLEU score computation between labels and predictions. An approximate BLEU scoring method since we do not glue word pieces or decode the ids and tokenize the output. By default, we use ngram order of 4 and use brevity penalty. Also, this does not have beam search. Args: predictions: tensor, model predictions labels: tensor, gold output. Returns: bleu: int, approx bleu score """ outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) # Convert the outputs and labels to a [batch_size, input_length] tensor. outputs = tf.squeeze(outputs, axis=[-1, -2]) labels = tf.squeeze(labels, axis=[-1, -2]) bleu = tf.py_func(compute_bleu, (labels, outputs), tf.float32) return bleu, tf.constant(1.0)
def _process(record): num = tf.py_func(pickle.loads, [record], tf.int32) bucket_key = tf.shape(num)[0] return [num, tf.transpose(num, [1, 0, 2])], bucket_key
def _process(record): bucket_key = 1 num, = tf.py_func(pickle.loads, [record], [tf.bool]) return [num], bucket_key
def _ProcessLine(self, line): """A single-text-line processor. Gets a string tensor representing a line of text that have been read from the input file, and splits it to graphemes (characters). We use original characters as the target labels, and the lowercased and punctuation-removed characters as the source labels. Args: line: a 1D string tensor. Returns: A NestedMap containing the processed example. """ p = self.params # Tokenize the input into integer ids. # tgt_ids has the start-of-sentence token prepended, and tgt_labels has the # end-of-sentence token appended. tgt_ids, tgt_labels, tgt_paddings = self.tokenizer.StringsToIds( tf.convert_to_tensor([line]), p.target_max_length) # Because StringsToIds requires a vector but _ProcessLine is called for # individual lines, we need to manually remove the batch dimension. tgt_ids = tgt_ids[0] tgt_labels = tgt_labels[0] tgt_paddings = tgt_paddings[0] # This normalization function produces the "source" text from which the # Punctuator task is trained to reproduce the original "target" text. def Normalize(line): # Lowercase and remove punctuation. line = line.lower().translate(None, string.punctuation.encode('utf-8')) # Convert multiple consecutive spaces to a single one. line = b' '.join(line.split()) return line normalized_line = tf.py_func(Normalize, [line], tf.string, stateful=False) _, src_labels, src_paddings = self.tokenizer.StringsToIds( tf.convert_to_tensor([normalized_line]), p.source_max_length) # Because StringsToIds requires a vector but _ProcessLine is called for # individual lines, we need to manually remove the batch dimension. src_labels = src_labels[0] src_paddings = src_paddings[0] # The model expects the source without a start-of-sentence token. src_ids = src_labels tgt_weights = 1.0 - tgt_paddings ret = py_utils.NestedMap() ret.src = py_utils.NestedMap() ret.src.ids = tf.cast(src_ids, dtype=tf.int32) ret.src.paddings = src_paddings ret.tgt = py_utils.NestedMap() ret.tgt.ids = tgt_ids ret.tgt.labels = tf.cast(tgt_labels, dtype=tf.int32) ret.tgt.weights = tgt_weights ret.tgt.paddings = tgt_paddings return ret
def Process(source_id, record): del source_id # Unused. [num] = tf.py_func(int, [record], [tf.int64]) return py_utils.NestedMap(data=num), 1
def Wrap(val): dtype = tf.as_dtype(val.dtype) assert dtype != tf.string # tf.string is not supported by py_func. return tf.py_func(lambda: val, [], dtype)
def MyFn(): return tf.py_func(lambda: next(it), [], [tf.float32, tf.float32])