def __init__(self, args):
     super().__init__()
     attn_heads = args.num_heads
     hidden = args.hidden_units
     # feed_forward_hidden = hidden * 4
     feed_forward_hidden = hidden  # H->H->H instead of H->4H->H in TiSASRec
     dropout = args.dropout
     self.attention = MultiHeadedAttention(h=attn_heads, d_model=hidden, dropout=dropout)
     self.feed_forward = PositionwiseFeedForward(d_model=hidden, d_ff=feed_forward_hidden, dropout=dropout, act='relu')
     self.input_sublayer = SublayerConnection(args=args, size=hidden, dropout=dropout)
     self.output_sublayer = SublayerConnection(args=args, size=hidden, dropout=dropout)
 def __init__(self, args):
     super().__init__()
     attn_heads = args.num_heads
     hidden = args.hidden_units
     # feed_forward_hidden = 4 * hidden
     feed_forward_hidden = hidden  # H->H->H instead of H->4H->H in PFF
     dropout = args.dropout
     self.attention = MultiHeadedAttention(h=attn_heads, d_model=hidden, dropout=dropout)
     self.feed_forward = PositionwiseFeedForward(d_model=hidden, d_ff=feed_forward_hidden, dropout=dropout, act='relu')
     self.norm1 = nn.LayerNorm(hidden)
     self.norm2 = nn.LayerNorm(hidden)
     self.dropout = nn.Dropout(p=dropout)
예제 #3
0
    def __init__(self, args, La, Lr):
        super().__init__()

        hidden = args.hidden_units
        feed_forward_hidden = hidden * 4
        dropout = args.dropout
        self.attention = MixedAttention(args, La, Lr)
        self.feed_forward = PositionwiseFeedForward(d_model=hidden,
                                                    d_ff=feed_forward_hidden,
                                                    dropout=dropout)
        self.input_sublayer = SublayerConnection(args=args,
                                                 size=hidden,
                                                 dropout=dropout)
        self.output_sublayer = SublayerConnection(args=args,
                                                  size=hidden,
                                                  dropout=dropout)
        self.dropout = nn.Dropout(p=dropout)
예제 #4
0
 def __init__(self, args):
     super().__init__()
     attn_heads = args.num_heads
     hidden = args.hidden_units
     feed_forward_hidden = 4 * hidden
     dropout = args.dropout
     self.attention = MultiHeadedAttention(h=attn_heads,
                                           d_model=hidden,
                                           dropout=dropout)
     self.feed_forward = PositionwiseFeedForward(d_model=hidden,
                                                 d_ff=feed_forward_hidden,
                                                 dropout=dropout,
                                                 act='gelu',
                                                 middle_drop=False)
     self.input_sublayer = SublayerConnection(args=args,
                                              size=hidden,
                                              dropout=dropout)
     self.output_sublayer = SublayerConnection(args=args,
                                               size=hidden,
                                               dropout=dropout)
     self.dropout = nn.Dropout(p=dropout)
     self.args = args