Example #1
0
    def call(self,
             source_sequence,
             target_sequence,
             encoder_mask,
             decoder_mask,
             mask_future=True,
             shift_target_sequence_right=True):

        # Unpack the source and target sequences from the encoder.
        # Source Sequence: [batch_size x source_length]
        # Target Sequence: [batch_size x target_length]
        #
        # Generate the masks for the encoder and decoder. There are a lot of different ways that
        # the attention masks could be passed in, so this method handles a lot of these different
        # mask shapes.
        encoder_mask = transformer_layers.convert_to_attention_mask(
            source_sequence, encoder_mask)
        decoder_mask = transformer_layers.convert_to_attention_mask(
            target_sequence, decoder_mask)

        # After the end of the encoder and decoder generation phase, we have
        # Encoder Mask: [batch_size x source_length x source_length]
        # Decoder Mask: [batch_size x target_length x target_length]

        # Next, we perform the encoding of the sentence. This should take
        # as input a tensor of shape [batch_size x source_length x input_feature_shape]
        # and generate a tensor of shape [batch_size x source_length x d_model]

        ####################################  YOUR CODE HERE  ####################################
        # PART 5: Implement the full Transformer block

        # Part 1: Encode
        # Using the self.encoder, encode the source_sequence, and provide the encoder_mask variable as the optional mask.
        #print(source_sequence.shape)
        encoder_output = self.encoder(source_sequence,
                                      encoder_mask=encoder_mask)
        #print("encoder_out", encoder_output.shape)
        # Part 2: Decode
        # Finally, we need to do a decoding this should generate a
        # tensor of shape [batch_size x target_length x d_model]
        # from the encoder output.

        # Using the self.decoder, provide it with the decoder input, and the encoder_output.

        # As usual, provide it with the encoder and decoder_masks
        # Finally, You should also pass it these two optional arguments:
        # shift_target_sequence_right=shift_target_sequence_right, mask_future=mask_future
        decoder_output = self.decoder(
            target_sequence,
            encoder_output,
            encoder_mask=encoder_mask,
            decoder_mask=decoder_mask,
            mask_future=mask_future,
            shift_target_sequence_right=shift_target_sequence_right)
        #print("target", target_sequence.shape)
        #print("decoder", decoder_output.shape)
        #call(self, target_input, encoder_output, encoder_mask=None, decoder_mask=None, mask_future=False,
        #shift_target_sequence_right=False)

        return decoder_output  # We return the decoder's output
    def call(self,
             target_sequence,
             decoder_mask,
             mask_future=True,
             shift_target_sequence_right=True):

        # Unpack the target sequences from the decoder.
        # Target Sequence: [batch_size x target_length]
        #
        # Generate the masks for the decoder. There are a lot of different ways that
        # the attention masks could be passed in, so this method handles a lot of these different
        # mask shapes.
        decoder_mask = transformer_layers.convert_to_attention_mask(
            target_sequence, decoder_mask)

        # After the end of the decoder generation phase, we have
        # Decoder Mask: [batch_size x target_length x target_length]

        # Next, we perform the encoding of the sentence. This should take
        # as input a tensor of shape [batch_size x source_length x input_feature_shape]
        # and generate a tensor of shape [batch_size x source_length x d_model]

        # PART 5: Implementation of the full GPT block

        # Part 2: Decode
        # Finally, we need to do a decoding this should generate a
        # tensor of shape [batch_size x target_length x d_model]
        # from the encoder output.

        decoder_output = self.decoder(
            target_sequence,
            decoder_mask=decoder_mask,
            mask_future=mask_future,
            shift_target_sequence_right=shift_target_sequence_right)

        return decoder_output  # We return the decoder's output