def __init__(self, args): super().__init__() attn_heads = args.num_heads hidden = args.hidden_units # feed_forward_hidden = hidden * 4 feed_forward_hidden = hidden # H->H->H instead of H->4H->H in TiSASRec dropout = args.dropout self.attention = MultiHeadedAttention(h=attn_heads, d_model=hidden, dropout=dropout) self.feed_forward = PositionwiseFeedForward(d_model=hidden, d_ff=feed_forward_hidden, dropout=dropout, act='relu') self.input_sublayer = SublayerConnection(args=args, size=hidden, dropout=dropout) self.output_sublayer = SublayerConnection(args=args, size=hidden, dropout=dropout)
def __init__(self, args): super().__init__() attn_heads = args.num_heads hidden = args.hidden_units # feed_forward_hidden = 4 * hidden feed_forward_hidden = hidden # H->H->H instead of H->4H->H in PFF dropout = args.dropout self.attention = MultiHeadedAttention(h=attn_heads, d_model=hidden, dropout=dropout) self.feed_forward = PositionwiseFeedForward(d_model=hidden, d_ff=feed_forward_hidden, dropout=dropout, act='relu') self.norm1 = nn.LayerNorm(hidden) self.norm2 = nn.LayerNorm(hidden) self.dropout = nn.Dropout(p=dropout)
def __init__(self, args, La, Lr): super().__init__() hidden = args.hidden_units feed_forward_hidden = hidden * 4 dropout = args.dropout self.attention = MixedAttention(args, La, Lr) self.feed_forward = PositionwiseFeedForward(d_model=hidden, d_ff=feed_forward_hidden, dropout=dropout) self.input_sublayer = SublayerConnection(args=args, size=hidden, dropout=dropout) self.output_sublayer = SublayerConnection(args=args, size=hidden, dropout=dropout) self.dropout = nn.Dropout(p=dropout)
def __init__(self, args): super().__init__() attn_heads = args.num_heads hidden = args.hidden_units feed_forward_hidden = 4 * hidden dropout = args.dropout self.attention = MultiHeadedAttention(h=attn_heads, d_model=hidden, dropout=dropout) self.feed_forward = PositionwiseFeedForward(d_model=hidden, d_ff=feed_forward_hidden, dropout=dropout, act='gelu', middle_drop=False) self.input_sublayer = SublayerConnection(args=args, size=hidden, dropout=dropout) self.output_sublayer = SublayerConnection(args=args, size=hidden, dropout=dropout) self.dropout = nn.Dropout(p=dropout) self.args = args