def __init__(self, args): super(FFAndNorm, self).__init__() self.ffn = FFN(args) self.norm1 = LayerNorm(args.hidden_size) self.dropout2 = nn.Dropout(args.dropout_r) self.norm2 = LayerNorm(args.hidden_size)
def __init__(self, d_model=512, n_heads=8, d_ff=2048, dropout=0.1): super(ReZeroTransformerLayer, self).__init__() self.norm1 = LayerNorm(d_model) self.self_attn = MultiHeadedAttention(n_heads, d_model) self.ffn = PositionwiseFeedForward(d_model, d_ff, dropout) self.norm2 = LayerNorm(d_model) self.resweight = nn.Parameter(torch.tensor(0.0))
def __init__(self, d_model, ffn_hidden, n_head, drop_prob): super(EncoderLayer, self).__init__() self.attention = MultiHeadAttention(d_model=d_model, n_head=n_head) self.norm1 = LayerNorm(d_model=d_model) self.dropout1 = nn.Dropout(p=drop_prob) self.ffn = PositionwiseFeedForward(d_model=d_model, hidden=ffn_hidden, drop_prob=drop_prob) self.norm2 = LayerNorm(d_model=d_model) self.dropout2 = nn.Dropout(p=drop_prob)
def __init__(self, args): super(SA, self).__init__() self.mhatt = MHAtt(args) self.ffn = FFN(args) self.dropout1 = nn.Dropout(args.dropout_r) self.norm1 = LayerNorm(args.hidden_size) self.dropout2 = nn.Dropout(args.dropout_r) self.norm2 = LayerNorm(args.hidden_size)
def __init__(self, args, i, shift=False): super(LA_Block, self).__init__() self.args = args self.sa1 = SA(args) self.sa3 = SGA(args, shift) self.last = (i == args.layer - 1) if not self.last: self.att_lang = AttFlat(args, args.lang_seq_len, merge=False) self.att_audio = AttFlat(args, args.audio_seq_len, merge=False) self.norm_l = LayerNorm(args.hidden_size) self.norm_i = LayerNorm(args.hidden_size) self.dropout = nn.Dropout(args.dropout_r)
def __init__(self, args, shift=False): super(SGA, self).__init__() self.mhatt1 = MHAtt(args, shift) self.mhatt2 = MHAtt(args, shift) self.ffn = FFN(args) self.dropout1 = nn.Dropout(args.dropout_r) self.norm1 = LayerNorm(args.hidden_size) self.dropout2 = nn.Dropout(args.dropout_r) self.norm2 = LayerNorm(args.hidden_size) self.dropout3 = nn.Dropout(args.dropout_r) self.norm3 = LayerNorm(args.hidden_size)
def __init__(self, args, vocab_size, pretrained_emb): super(MCA, self).__init__() self.args = args self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=args.word_embed_size) # Loading the GloVe embedding weights self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm = nn.LSTM(input_size=args.word_embed_size, hidden_size=args.hidden_size, num_layers=1, batch_first=True) self.adapter = nn.Linear(args.audio_feat_size, args.hidden_size) self.enc_list = nn.ModuleList([SA(args) for _ in range(args.layer)]) self.dec_list = nn.ModuleList([SGA(args) for _ in range(args.layer)]) #flattening self.attflat_img = AttFlat(args) self.attflat_lang = AttFlat(args) # Classification layers self.proj_norm = LayerNorm(2 * args.hidden_size) if self.args.task_binary: self.proj = nn.Linear(2 * args.hidden_size, 2) else: self.proj = nn.Linear(2 * args.hidden_size, 7)
def __init__(self, args, vocab_size, pretrained_emb): super(Model_MAT, self).__init__() self.args = args # LSTM self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=args.word_embed_size) # Loading the GloVe embedding weights self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.input_drop = nn.Dropout(args.dropout_i) self.lstm_x = nn.LSTM(input_size=args.word_embed_size, hidden_size=args.hidden_size, num_layers=1, batch_first=True) self.lstm_y = nn.LSTM(input_size=args.audio_feat_size, hidden_size=args.hidden_size, num_layers=1, batch_first=True) # self.adapter = nn.Linear(args.audio_feat_size, args.hidden_size) self.backbone = MCA_ED(args) # Flatten to vector self.attflat_img = AttFlat(args) self.attflat_lang = AttFlat(args) # Classification layers self.proj_norm = LayerNorm(2 * args.hidden_size) self.proj = nn.Linear(2 * args.hidden_size, args.ans_size) self.proj_drop = nn.Dropout(args.dropout_o)
def __init__(self, d_model, n_vocab): super(MLMLossHead, self).__init__() self.d_model = d_model self.n_vocab = n_vocab self.cls = nn.Sequential(nn.Linear(d_model, d_model), nn.GELU(), LayerNorm(d_model), nn.Linear(d_model, n_vocab, bias=False)) self.loss_fn = nn.CrossEntropyLoss()
def __init__(self, args, vocab_size, pretrained_emb): super(Model_LAV, self).__init__() self.args = args # LSTM self.embedding = nn.Embedding( num_embeddings=vocab_size, embedding_dim=args.word_embed_size ) # Loading the GloVe embedding weights self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm_x = nn.LSTM( input_size=args.word_embed_size, hidden_size=args.hidden_size, num_layers=1, batch_first=True ) # self.lstm_y = nn.LSTM( # input_size=args.audio_feat_size, # hidden_size=args.hidden_size, # num_layers=1, # batch_first=True # ) # Feature size to hid size self.adapter_y = nn.Linear(args.audio_feat_size, args.hidden_size) self.adapter_z = nn.Linear(args.video_feat_size, args.hidden_size) # Encoder blocks self.enc_list = nn.ModuleList([Block(args, i) for i in range(args.layer)]) # Flattenting features before proj self.attflat_ac = AttFlat(args, 1, merge=True) self.attflat_vid = AttFlat(args, 1, merge=True) self.attflat_lang = AttFlat(args, 1, merge=True) # Classification layers self.proj_norm = LayerNorm(2 * args.hidden_size) if self.args.task == "sentiment": if self.args.task_binary: self.proj = nn.Linear(2 * args.hidden_size, 2) else: self.proj = nn.Linear(2 * args.hidden_size, 7) if self.args.task == "emotion": self.proj = self.proj = nn.Linear(2 * args.hidden_size, 6)
def __init__(self, args, vocab_size, pretrained_emb, shift=False): super(Model_LA, self).__init__() self.args = args # LSTM self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=args.word_embed_size) # Loading the GloVe embedding weights self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm_x = nn.LSTM(input_size=args.word_embed_size, hidden_size=args.hidden_size, num_layers=1, batch_first=True) self.lstm_y = nn.LSTM(input_size=args.audio_feat_size, hidden_size=args.hidden_size, num_layers=1, batch_first=True) # Feature size to hid size # self.adapter = nn.Linear(args.audio_feat_size, args.hidden_size) # Encoder blocks self.enc_list = nn.ModuleList( [LA_Block(args, i, shift) for i in range(args.layer)]) # Flattenting features before proj self.attflat_img = AttFlat(args, 1, merge=True) self.attflat_lang = AttFlat(args, 1, merge=True) # Classification layers self.proj_norm = LayerNorm(2 * args.hidden_size) self.proj = self.proj = nn.Linear(2 * args.hidden_size, args.ans_size) self.cos = nn.CosineSimilarity(dim=1, eps=1e-6)
def __init__(self, d_model=512, n_heads=8, d_ff=2048, dropout=0.1): super(TransformerEncoderLayer, self).__init__() self.norm1 = LayerNorm(d_model) self.self_attn = MultiHeadedAttention(n_heads, d_model) self.ffn = PositionwiseFeedForward(d_model, d_ff, dropout) self.norm2 = LayerNorm(d_model)