def testAddSequenceControls(self, ids, length, start_id, end_id, expected_ids, expected_length): ids = tf.constant(ids, dtype=tf.int64) length = tf.constant(length, dtype=tf.int32) ids, length = inputters.add_sequence_controls(ids, length, start_id=start_id, end_id=end_id) self.assertAllEqual(self.evaluate(ids), expected_ids) self.assertAllEqual(self.evaluate(length), expected_length)
def analyze(self, features): # Encode the source. source_length = self.features_inputter.get_length(features) source_inputs = self.features_inputter(features) encoder_outputs, encoder_state, encoder_sequence_length = self.encoder( source_inputs, sequence_length=source_length) predictions = self._dynamic_decode(features, encoder_outputs, encoder_state, encoder_sequence_length) length = predictions["length"] length = tf.squeeze(length, axis=[1]) tokens = predictions["tokens"] tokens = tf.squeeze(tokens, axis=[1]) tokens = tf.where(tf.equal(tokens, "</s>"), tf.fill(tf.shape(tokens), ""), tokens) ids = self.labels_inputter.tokens_to_ids.lookup(tokens) if self.labels_inputter.mark_start or self.labels_inputter.mark_end: ids, length = add_sequence_controls( ids, length, start_id=START_OF_SENTENCE_ID if self.labels_inputter.mark_start else None, end_id=END_OF_SENTENCE_ID if self.labels_inputter.mark_end else None, ) labels = { "ids_out": ids[:, 1:], "ids": ids[:, :-1], "length": length - 1 } outputs = self._decode_target(labels, encoder_outputs, encoder_state, encoder_sequence_length) return { "length": tf.squeeze(predictions["length"], axis=[1]), "tokens": tf.squeeze(predictions["tokens"], axis=[1]), "alignment": tf.squeeze(predictions["alignment"], axis=[1]), "encoder_outputs": encoder_outputs, "logits": outputs["logits"], "index": features["index"], }