def __init__(self, d_model_M1, d_model_M2, d_model, dout_p, H, d_ff_M1, d_ff_M2): super(BiModalEncoderLayer, self).__init__() self.self_att_M1 = MultiheadedAttention(d_model_M1, d_model_M1, d_model_M1, H, dout_p, d_model) self.self_att_M2 = MultiheadedAttention(d_model_M2, d_model_M2, d_model_M2, H, dout_p, d_model) self.bi_modal_att_M1 = MultiheadedAttention(d_model_M1, d_model_M2, d_model_M2, H, dout_p, d_model) self.bi_modal_att_M2 = MultiheadedAttention(d_model_M2, d_model_M1, d_model_M1, H, dout_p, d_model) self.feed_forward_M1 = PositionwiseFeedForward(d_model_M1, d_ff_M1, dout_p) self.feed_forward_M2 = PositionwiseFeedForward(d_model_M2, d_ff_M2, dout_p) self.res_layers_M1 = clone(ResidualConnection(d_model_M1, dout_p), 3) self.res_layers_M2 = clone(ResidualConnection(d_model_M2, dout_p), 3)
def __init__(self, d_model, dout_p, H, d_ff, N): super(Encoder, self).__init__() self.enc_layers = clone(EncoderLayer(d_model, dout_p, H, d_ff), N)
def __init__(self, d_model, dout_p, H, d_ff): super(EncoderLayer, self).__init__() self.res_layers = clone(ResidualConnection(d_model, dout_p), 2) self.self_att = MultiheadedAttention(d_model, d_model, d_model, H) self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dout_p=0.0)