def print_prediction(self, prediction, params=None, stream=None): if params is None: params = {} with_scores = params.get("with_scores") alignment_type = params.get("with_alignments") if alignment_type and "alignment" not in prediction: raise ValueError( "with_alignments is set but the model did not return alignment information" ) num_hypotheses = len(prediction["log_probs"]) for i in range(num_hypotheses): if "tokens" in prediction: target_length = prediction["length"][i] tokens = prediction["tokens"][i][:target_length] sentence = self.labels_inputter.tokenizer.detokenize(tokens) else: sentence = prediction["text"][i] score = None attention = None if with_scores: score = prediction["log_probs"][i] if alignment_type: attention = prediction["alignment"][i][:target_length] sentence = misc.format_translation_output( sentence, score=score, attention=attention, alignment_type=alignment_type, ) misc.print_as_bytes(sentence, stream=stream)
def detokenize_stream(self, input_stream=sys.stdin, output_stream=sys.stdout, delimiter=" "): """Detokenizes a stream of sentences. Args: input_stream: The input stream. output_stream: The output stream. delimiter: The token delimiter used for text serialization. """ for line in input_stream: tokens = line.strip().split(delimiter) string = self.detokenize(tokens) misc.print_as_bytes(string, stream=output_stream)
def tokenize_stream(self, input_stream=sys.stdin, output_stream=sys.stdout, delimiter=" "): """Tokenizes a stream of sentences. Args: input_stream: The input stream. output_stream: The output stream. delimiter: The token delimiter to use for text serialization. """ for line in input_stream: line = line.strip() tokens = self.tokenize(line) merged_tokens = delimiter.join(tokens) misc.print_as_bytes(merged_tokens, stream=output_stream)
def print_score(self, score, params=None, stream=None): if params is None: params = {} length = score["length"] tokens = score["tokens"][:length] sentence = self.decoder_inputter.tokenizer.detokenize(tokens) token_level_scores = None attention = None if params.get("with_token_level"): token_level_scores = score["cross_entropy"][:length] if "attention" in score: attention = score["attention"][:length] alignment_type = params.get("with_alignments") sentence = misc.format_translation_output( sentence, score=score["score"], token_level_scores=token_level_scores, attention=attention, alignment_type=alignment_type) misc.print_as_bytes(sentence, stream=stream)
def tokenize_stream( self, input_stream=sys.stdin, output_stream=sys.stdout, delimiter=" ", training=True, ): """Tokenizes a stream of sentences. Args: input_stream: The input stream. output_stream: The output stream. delimiter: The token delimiter to use for text serialization. training: Set to ``False`` to tokenize for inference. """ for line in input_stream: line = line.strip() tokens = self.tokenize(line, training=training) merged_tokens = delimiter.join(tokens) misc.print_as_bytes(merged_tokens, stream=output_stream)
def _process_stream_as_dataset( input_stream, output_stream, map_func, batch_size=512, num_parallel_calls=4, ): dataset = tf.data.Dataset.from_generator( lambda: input_stream, output_types=tf.string, output_shapes=tf.TensorShape([]), ) dataset = dataset.batch(batch_size) dataset = dataset.map(map_func, num_parallel_calls=num_parallel_calls) expected_spec = tf.TensorSpec(shape=[None], dtype=tf.string) if dataset.element_spec != expected_spec: raise TypeError( "Expected map_func to produce elements with spec %s, but got spec %s instead" % (expected_spec, dataset.element_spec)) for lines in dataset.as_numpy_iterator(): for line in lines: misc.print_as_bytes(line, stream=output_stream)
def print_prediction(self, prediction, params=None, stream=None): misc.print_as_bytes(prediction["classes"], stream=stream)
def print_prediction(self, prediction, params=None, stream=None): target_length = prediction["length"] tokens = prediction["tokens"][:target_length] sentence = self.examples_inputter.tokenizer.detokenize(tokens) sentence = misc.format_translation_output(sentence) misc.print_as_bytes(sentence, stream=stream)
def print_prediction(self, prediction, params=None, stream=None): tags = prediction["tags"][:prediction["length"]] sent = b" ".join(tags) misc.print_as_bytes(sent, stream=stream)