def __init__(self, conv_layers, hidden_dim, feed_forward_dim=2048): super(Encoder, self).__init__() # Your code here self.conv=ConvEncoder(input_dim=hidden_dim,num_layers=conv_layers) self.feed_forward=PositionFeedforward(hidden_dim,feed_forward_dim) self.attention=MultiHeadAttention(hid_dim=hidden_dim,n_heads=16)
def __init__(self, d_model=512, d_feature=64, d_ff=2048, dropout=0.1): super().__init__() self.n_heads = d_model // d_feature self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.norm3 = LayerNorm(d_model) self.masked_attn = MultiHeadAttention(d_model, d_feature, dropout) self.attn = MultiHeadAttention(d_model, d_feature, dropout) self.position_wise_ff = nn.Sequential( nn.Linear(d_model, d_ff), nn.ReLU(), nn.Linear(d_ff, d_model), ) self.dropout = nn.Dropout(dropout)
def __init__(self, d_model, num_heads, dff, rate=0.1): super(DecoderLayer, self).__init__() self.mha1 = MultiHeadAttention(d_model, num_heads) self.mha2 = MultiHeadAttention(d_model, num_heads) self.ffn = point_wise_feed_forward_network(d_model, dff) self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.dropout1 = tf.keras.layers.Dropout(rate) self.dropout2 = tf.keras.layers.Dropout(rate) self.dropout3 = tf.keras.layers.Dropout(rate)
def __init__(self, num_head=8, num_dim_k=64, num_dim_v=64, d_rate_attn=0.1, act_func1="LeakyReLU", dim2=100, act_func2="LeakyReLU"): """ num_head: for Attn, the number of head in MultiHeadAttention num_dim_k: for Attn, the number of dimension query and key will mapping to num_dim_v: for Attn, the number of dimension value will mapping to d_rate_attn: drop out rate for MultiHeadAttention """ super(MultiHeadAttnMlpModel, self).__init__() num_dim = 500 num_seq = 100 self.attn = MultiHeadAttention(num_head, num_dim, num_dim_k, num_dim_v, d_rate_attn) self.bn = nn.BatchNorm1d(num_dim) self.mlp = nn.Sequential() self.mlp.add_module('fc1', nn.Linear(num_seq * num_dim, num_dim)) self.mlp.add_module('bn1', nn.BatchNorm1d(num_dim)) self.mlp.add_module('act_fun1', nnActi.get_acti(act_func1)) self.mlp.add_module('fc2', nn.Linear(num_dim, dim2)) self.mlp.add_module('bn2', nn.BatchNorm1d(dim2)) self.mlp.add_module('act_fun2', nnActi.get_acti(act_func2)) self.mlp.add_module('fc3', nn.Linear(dim2, 1))
class Decoder(nn.Module): def __init__(self, hidden_size, output_size, end_token, max_length, type, " attention_type): super(Decoder, self).__init__() self.hidden_size = hidden_size self.end_token = end_token self.type = type self.attention_type = attention_type # Initialize dropout rates self.dropout_rate_0 = 0.5 self.dropout_rate = 0.5 self.embedding = nn.Embedding(output_size, hidden_size) if self.type == 'gru': self.network = nn.GRU(hidden_size, hidden_size, batch_first=True) else: self.network = nn.RNN(hidden_size, hidden_size, batch_first=True) # Initialize the right attention mechanisms if attention_type.lower() == "bilinear": self.attention = BiLinearAttention(hidden_size) elif attention_type.lower() == "multihead": self.attention = MultiHeadAttention(hidden_size) else: self.attention = ScaledDotAttention(hidden_size) self.attention_combined = nn.Linear(hidden_size * 2, hidden_size) self.out = nn.Linear(hidden_size, output_size) self.logsoftmax = nn.LogSoftmax(dim=1)
def __init__(self, d_o, d_inner, n_head, d_k, d_v, dropout=0.1): super(DecoderLayer, self).__init__() self.enc_attn = MultiHeadAttention(n_head, d_o, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_o, d_inner, dropout=dropout)
def __init__(self, w, k, n, q, h, z): """ w = window length (number of periods) k = dimension of time (note: time2vec will make time-dim k+1) n = number of features at each point in time (without time features) q = number of queries (column in W) h = number of heads z = number of features to be extracted from the q*h results created by the attention heads """ super().__init__() self.w, self.k, self.n, self.q, self.h, self.z = w, k, n, q, h, z self.time2vec = Time2Vec(k) self.mha = MultiHeadAttention(n + k + 1, q, h, z) self.W = tf.Variable(tf.random.uniform([1, 1, w])) self.C = tf.Variable(tf.initializers.orthogonal()([1, z, 1]))
def __init__(self, num_head=8, num_dim_k=64, num_dim_v=64, d_rate_attn=0.1, dim2=100, act_func2="LeakyReLU"): num_dim = 500 super(MultiHeadAttnLSTMModel, self).__init__() self.attn = MultiHeadAttention(num_head, num_dim, num_dim_k, num_dim_v, d_rate_attn) self.rnn = nn.LSTM(input_size=500, hidden_size=500, num_layers=2) self.mlp = nn.Sequential() self.mlp.add_module('fc1', nn.Linear(500, dim2)) self.mlp.add_module('bn2', nn.BatchNorm1d(dim2)) self.mlp.add_module('act_fun2', nnActi.get_acti(act_func2)) self.mlp.add_module('fc3', nn.Linear(dim2, 1))
class Encoder(nn.Module): # 1 Mark def __init__(self, conv_layers, hidden_dim, feed_forward_dim=2048): super(Encoder, self).__init__() # Your code here self.conv = ConvEncoder(input_dim=hidden_dim, num_layers=conv_layers) self.attention = MultiHeadAttention(hid_dim=hidden_dim, n_heads=16) self.feed_forward = PositionFeedforward( hid_dim=hidden_dim, feedForward_dim=feed_forward_dim) def forward(self, input): """ Forward Pass of the Encoder Class :param input: Input Tensor for the forward pass. """ # Your code here out = self.conv.forward(input) out = self.attention.forward(out, out, out) out = self.feed_forward.forward(out) return out
def __init__(self, num_head=8, num_dim_k=64, num_dim_v=64, d_rate_attn=0.1, dim1=20, act_func1="LeakyReLU", kernel_size1=3, stride1=2, act_func2="LeakyReLU", kernel_size2=3, stride2=2): """ problematic same problem as described above """ num_dim = 500 #seq_len = 100 super(MultiHeadAttnConvModel2, self).__init__() self.attn = MultiHeadAttention(num_head, num_dim, num_dim_k, num_dim_v, d_rate_attn) self.dim_conv_out1 = get_dim_out(seq_len, kernel_size1, stride1) self.dim_conv_out2 = get_dim_out(self.dim_conv_out1, kernel_size2, stride2) self.layers = nn.Sequential() self.layers.add_module("conv1", nn.Conv1d(num_dim, dim1, kernel_size1, stride1)) self.layers.add_module("bn1", nn.BatchNorm1d(dim1)) self.layers.add_module("act_func1", nnActi.get_acti(act_func1)) if self.dim_conv_out2 < 1: self.layers.add_module("conv2", nn.Conv1d(dim1, 1, 2, 1)) self.dim_conv_out = get_dim_out(self.dim_conv_out1, 2, 1) else: self.layers.add_module("conv2", nn.Conv1d(dim1, 1, kernel_size2, stride2)) self.dim_conv_out = self.dim_conv_out2 self.layers.add_module('bn2', nn.BatchNorm1d(1)) self.layers.add_module('act_func2', nnActi.get_acti(act_func2)) #self.layers.add_module("maxpool", nn.MaxPool1d(124)) self.li = nn.Linear(self.dim_conv_out, 1, bias=True)
def __init__(self, conv_layers, hidden_dim, feed_forward_dim=2048): super(Encoder, self).__init__() # The classes are ConvEncoder,MultiheadAttention and PositionFeedForward. self.conv = ConvEncoder(input_dim=hidden_dim, num_layers=conv_layers) self.feed_forward = PositionFeedforward(hidden_dim, feed_forward_dim) self.attention = MultiHeadAttention(hid_dim=hidden_dim, n_heads=16)
# Your code goes here. from trainer import trainer from utils import * from datasets import dataset from Encoder import Encoder from LiarLiar import arePantsonFire from Attention import MultiHeadAttention, PositionFeedforward liar_dataset_train = dataset(prep_Data_from='train') liar_dataset_val = dataset(prep_Data_from='val') sent_len, just_len = liar_dataset_train.get_max_lenghts() dataloader_train = DataLoader(dataset=liar_dataset_train, batch_size=50) dataloader_val = DataLoader(dataset=liar_dataset_val, batch_size=25) statement_encoder = Encoder(hidden_dim=512, conv_layers=5) justification_encoder = Encoder(hidden_dim=512, conv_layers=5) multiheadAttention = MultiHeadAttention(hid_dim=512, n_heads=32) positionFeedForward = PositionFeedforward(hid_dim=512, feedForward_dim=2048) model = arePantsonFire(statement_encoder, justification_encoder, multiheadAttention, positionFeedForward, 512, sent_len, just_len, liar_dataset_train.embedding_dim, 'cpu') trainer(model, dataloader_train, dataloader_val, num_epochs=1, train_batch=1, test_batch=1, device='cpu') # Do not change module_list , otherwise no marks will be awarded module_list = [ liar_dataset_train, liar_dataset_val, dataloader_train, dataloader_val,
def __init__(self, hidden_size, input_size): super(TransformerSublayer, self).__init__() self.attention = MultiHeadAttention(hidden_size) self.feedforward1 = nn.Linear(hidden_size, hidden_size * 4) self.feedforward2 = nn.Linear(hidden_size * 4, hidden_size)
#value in prep_data_from argument to prepare data. sentence and justification length are both #defined as liar_dataset_train.get_max_length(). Instantiate dataloader_train and dataloader_val #on train and val dataset liar_dataset_train = dataset() liar_dataset_val = dataset(prep_Data_from='val', purpose='test_class') batch_size = 25 dataloader_train = DataLoader(liar_dataset_train, batch_size) dataloader_val = DataLoader(liar_dataset_val, batch_size) #statement_encoder and justification_encoder defined as instances of Encoder class statement_encoder = Encoder(5,512) justification_encoder = Encoder(5,512) #multiHeadAttention and positionFeedForward are instances of the respective classes multiHeadAttention = MultiHeadAttention(512, 32) positionFeedForward = PositionFeedforward(512, 2048) #model is an instance of arePantsOnFire class model = arePantsonFire( statement_encoder , justification_encoder , multihead_Attention , position_Feedforward , 512 , max_length_sentence ) #call to the trainer function from trainer import trainer path_to_save = None #Define it