Example #1
0
    def __init__(self, SRC: data.Field, TGT: data.Field):
        """
        :param SRC: the trained torchtext.data.Field object containing the source side vocabulary
        :param TGT: the trained torchtext.data.Field object containing the target side vocabulary
        """
        super(Transformer, self).__init__()
        self.SRC = SRC
        self.TGT = TGT

        # #################################### Parameter Initialization ################################################
        d_model = int(cfg.transformer_d_model)
        h = int(cfg.transformer_h)
        dropout = float(cfg.transformer_dropout)
        d_ff = int(cfg.transformer_d_ff)
        max_len = int(cfg.transformer_max_len)
        N = int(cfg.transformer_N)
        loss_smoothing = float(cfg.transformer_loss_smoothing)

        # #################################### Loss Function Initialization ############################################
        self.criterion = LabelSmoothing(
            size=len(TGT.vocab),
            padding_idx=TGT.vocab.stoi[cfg.pad_token],
            smoothing=loss_smoothing)

        # #################################### ENCODER INITIALIZATION ##################################################
        c = copy.deepcopy
        attn = MultiHeadedAttention(h, d_model)
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        encoder_layer = EncoderLayer(d_model, c(attn), c(ff), dropout)
        self.enc_layers = clones(encoder_layer, N)
        self.enc_norm = LayerNorm(encoder_layer.size)

        # #################################### DECODER INITIALIZATION ##################################################
        position = PositionalEncoding(d_model, dropout, max_len)
        decoder_layer = DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout)
        self.dec_layers = clones(decoder_layer, N)
        self.dec_norm = LayerNorm(decoder_layer.size)

        # #################################### EMBEDDINGS INITIALIZATION ###############################################
        self.src_embed = nn.Sequential(Embeddings(d_model, len(SRC.vocab)),
                                       c(position))
        self.tgt_embed = nn.Sequential(Embeddings(d_model, len(TGT.vocab)),
                                       c(position))
        # #################################### GENERATOR INITIALIZATION ################################################
        self.generator = Generator(d_model, len(TGT.vocab))

        # #################################### BEAM SEARCH PARAMETERS ##################################################
        self.beam_search_decoding = False
        self.beam_size = int(cfg.beam_size)
        self.beam_search_length_norm_factor = float(
            cfg.beam_search_length_norm_factor)
        self.beam_search_coverage_penalty_factor = float(
            cfg.beam_search_coverage_penalty_factor)
Example #2
0
 def __init__(self, size, self_attn, src_attn, feed_forward, dropout):
     super(DecoderLayer, self).__init__()
     self.size = size
     self.self_attn = self_attn
     self.src_attn = src_attn
     self.feed_forward = feed_forward
     self.sublayer = clones(SublayerConnection(size, dropout), 3)
Example #3
0
 def __init__(self, h, d_model, dropout=0.1):
     """
     Implements Figure 2 (right) of the paper (https://arxiv.org/pdf/1706.03762.pdf)
     """
     super(MultiHeadedAttention, self).__init__()
     assert d_model % h == 0
     # We assume d_v always equals d_k
     self.d_k = d_model // h
     self.h = h
     self.linears = clones(nn.Linear(d_model, d_model), 4)
     self.attn = None
     self.dropout = nn.Dropout(p=dropout)
Example #4
0
 def __init__(self, use_left_over_vector=False, value_from_token_embedding=False):
     super(MultiHeadAspectAugmentationLayer, self).__init__()
     self.d_model = int(cfg.transformer_d_model)
     dropout = float(cfg.transformer_dropout)
     self.sublayer = clones(SublayerConnection(self.d_model, dropout), 2)
     self.aspect_attn = None
     self.bert_lm = None
     self.aspect_vectors = None
     self.bert_weights_for_average_pooling = None
     self.use_left_over_vector = use_left_over_vector
     print("Aspect multi-head attention will{} use the left-over aspect vector".format("" if use_left_over_vector else " not"))
     self.softmax = nn.Softmax(dim=-1)
     self.value_from_token_embedding = value_from_token_embedding