Esempio n. 1
0
 def testAddSequenceControls(self, ids, length, start_id, end_id,
                             expected_ids, expected_length):
     ids = tf.constant(ids, dtype=tf.int64)
     length = tf.constant(length, dtype=tf.int32)
     ids, length = inputters.add_sequence_controls(ids,
                                                   length,
                                                   start_id=start_id,
                                                   end_id=end_id)
     self.assertAllEqual(self.evaluate(ids), expected_ids)
     self.assertAllEqual(self.evaluate(length), expected_length)
Esempio n. 2
0
    def analyze(self, features):
        # Encode the source.
        source_length = self.features_inputter.get_length(features)
        source_inputs = self.features_inputter(features)
        encoder_outputs, encoder_state, encoder_sequence_length = self.encoder(
            source_inputs, sequence_length=source_length)

        predictions = self._dynamic_decode(features, encoder_outputs,
                                           encoder_state,
                                           encoder_sequence_length)

        length = predictions["length"]
        length = tf.squeeze(length, axis=[1])
        tokens = predictions["tokens"]
        tokens = tf.squeeze(tokens, axis=[1])
        tokens = tf.where(tf.equal(tokens, "</s>"),
                          tf.fill(tf.shape(tokens), ""), tokens)

        ids = self.labels_inputter.tokens_to_ids.lookup(tokens)
        if self.labels_inputter.mark_start or self.labels_inputter.mark_end:
            ids, length = add_sequence_controls(
                ids,
                length,
                start_id=START_OF_SENTENCE_ID
                if self.labels_inputter.mark_start else None,
                end_id=END_OF_SENTENCE_ID
                if self.labels_inputter.mark_end else None,
            )
        labels = {
            "ids_out": ids[:, 1:],
            "ids": ids[:, :-1],
            "length": length - 1
        }

        outputs = self._decode_target(labels, encoder_outputs, encoder_state,
                                      encoder_sequence_length)

        return {
            "length": tf.squeeze(predictions["length"], axis=[1]),
            "tokens": tf.squeeze(predictions["tokens"], axis=[1]),
            "alignment": tf.squeeze(predictions["alignment"], axis=[1]),
            "encoder_outputs": encoder_outputs,
            "logits": outputs["logits"],
            "index": features["index"],
        }