def predict(self, input):
        input_variable = variable_from_sentence(self.lang1, input)

        # Run through encoder
        encoder_hidden = self.encoder.init_hidden()
        encoder_outputs, encoder_hidden = self.encoder(input_variable,
                                                       encoder_hidden)
        decoder_hidden = self._hidden_encoder_to_decoder(encoder_hidden)

        query = beam_search(5, self.decoder, decoder_hidden, encoder_outputs,
                            self.lang2)
        query = fix_parentheses(query)
        return query
Esempio n. 2
0
    def _translate(self, process_id, input_item, models, sess):
        """
        Actual translation (model sampling).
        """

        # unpack input item attributes
        k = input_item.k
        x = input_item.batch
        #max_ratio = input_item.max_ratio

        y_dummy = numpy.zeros(shape=(len(x), 1))
        x, x_mask, _, _ = prepare_data(x, y_dummy, maxlen=None)

        sample = inference.beam_search(models, sess, x, x_mask, k)
        return sample
    def decode_at_test(self, enc_output, cross_attn_mask, batch_size,
                       beam_size, do_sample):
        """ Returns the probability distribution over target-side tokens conditioned on the output of the encoder;
         performs decoding via auto-regression at test time. """
        def _decode_step(target_embeddings, memories):
            """ Decode the encoder-generated representations into target-side logits with auto-regression. """
            # Propagate inputs through the encoder stack
            dec_output = target_embeddings
            # NOTE: No self-attention mask is applied at decoding, as future information is unavailable
            for layer_id in range(1, self.config.num_decoder_layers + 1):
                dec_output, memories['layer_{:d}'.format(layer_id)] = \
                    self.decoder_stack[layer_id]['self_attn'].forward(
                        dec_output, None, None, memories['layer_{:d}'.format(layer_id)])
                dec_output, _ = \
                    self.decoder_stack[layer_id]['cross_attn'].forward(dec_output, enc_output, cross_attn_mask)
                dec_output = self.decoder_stack[layer_id]['ffn'].forward(
                    dec_output)
            # Return prediction at the final time-step to be consistent with the inference pipeline
            dec_output = dec_output[:, -1, :]
            return dec_output, memories

        def _pre_process_targets(step_target_ids, current_time_step):
            """ Pre-processes target token ids before they're passed on as input to the decoder
            for auto-regressive decoding. """
            # Embed target_ids
            target_embeddings = self._embed(step_target_ids)
            signal_slice = positional_signal[:, current_time_step -
                                             1:current_time_step, :]
            target_embeddings += signal_slice
            if self.config.dropout_embeddings > 0:
                target_embeddings = tf.layers.dropout(
                    target_embeddings,
                    rate=self.config.dropout_embeddings,
                    training=self.training)
            return target_embeddings

        def _decoding_function(step_target_ids, current_time_step, memories):
            """ Generates logits for the target-side token predicted for the next-time step with auto-regression. """
            # Embed the model's predictions up to the current time-step; add positional information, mask
            target_embeddings = _pre_process_targets(step_target_ids,
                                                     current_time_step)
            # Pass encoder context and decoder embeddings through the decoder
            dec_output, memories = _decode_step(target_embeddings, memories)
            # Project decoder stack outputs and apply the soft-max non-linearity
            step_logits = self.softmax_projection_layer.project(dec_output)
            return step_logits, memories

        with tf.variable_scope(self.name):
            # Create nodes
            self._build_graph()

            positional_signal = get_positional_signal(
                self.config.translation_max_len, self.config.embedding_size,
                self.float_dtype)
            if beam_size > 0:
                # Initialize target IDs with <GO>
                initial_ids = tf.cast(tf.fill([batch_size], 1),
                                      dtype=self.int_dtype)
                initial_memories = self._get_initial_memories(
                    batch_size, beam_size=beam_size)
                output_sequences, scores = beam_search(
                    _decoding_function, initial_ids, initial_memories,
                    self.int_dtype, self.float_dtype,
                    self.config.translation_max_len, batch_size, beam_size,
                    self.embedding_layer.get_vocab_size(), 0,
                    self.config.length_normalization_alpha)

            else:
                # Initialize target IDs with <GO>
                initial_ids = tf.cast(tf.fill([batch_size, 1], 1),
                                      dtype=self.int_dtype)
                initial_memories = self._get_initial_memories(batch_size,
                                                              beam_size=1)
                output_sequences, scores = greedy_search(
                    _decoding_function,
                    initial_ids,
                    initial_memories,
                    self.int_dtype,
                    self.float_dtype,
                    self.config.translation_max_len,
                    batch_size,
                    0,
                    do_sample,
                    time_major=False)
        return output_sequences, scores