def __init__(self, size, self_attn, src_attn, feed_forward, dropout_rate, death_rate=0.0, normalize_before=True, concat_after=False): """Construct an DecoderLayer object.""" super(StochasticDecoderLayer, self).__init__() self.size = size self.self_attn = self_attn self.src_attn = src_attn self.feed_forward = feed_forward self.norm1 = LayerNorm(size) self.norm2 = LayerNorm(size) self.norm3 = LayerNorm(size) self.dropout = nn.Dropout(dropout_rate) self.death_rate = death_rate self.normalize_before = normalize_before self.concat_after = concat_after if self.concat_after: self.concat_linear1 = nn.Linear(size + size, size) self.concat_linear2 = nn.Linear(size + size, size)
def __init__(self, d_model, d_ff, dropout_rate): super(TransEncoder, self).__init__() self.multi_head_attn = MultiHeadAttn(d_model, num_heads=8) self.dropout1 = nn.Dropout(dropout_rate) self.norm1 = LayerNorm(d_model) self.feed_forward = FeedForward(d_model, d_ff) self.dropout2 = nn.Dropout(dropout_rate) self.norm2 = LayerNorm(d_model)
def __init__(self, input_module, filter_widths, filter_outputs, dropout=0.0, layer_norm=False, activation="relu"): super(CNNEncoder, self).__init__() self.input_module_ = input_module #self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks] self.filters_ = nn.ModuleList([ nn.Conv2d(1, filter_outputs, (fw, input_module.embedding_size)) for fw in filter_widths ]) if activation == "relu": self.activation_ = F.relu self.reverse_activation_and_pool_ = True else: raise Exception( "activation {} not implemented for no good reason.".format( activation)) self.dropout_ = dropout if layer_norm: self.layer_norms_ = nn.ModuleList( [LayerNorm(filter_outputs) for _ in filter_widths]) else: self.layer_norms_ = []
def __init__(self, layer, N): """ :param layer: :param N: """ super(Decoder, self).__init__() self.layers = clones(layer, N) self.norm = LayerNorm(layer.size)
def __init__(self, size, dropout): """ :param size: :param dropout: """ super(SublayerConnection, self).__init__() self.norm = LayerNorm(size) self.dropout = nn.Dropout(dropout)
def __init__(self, encoder: nn.Module, generator, embedding, n_layers: int): """ :param encoder: encoder/transformer layer that takes advantage of self-attention :param n_layers: int, number of encoder/transformer layers """ super(Bert, self).__init__() self.encoder = encoder self.layers = clone(encoder, n_layers) self.embed = embedding self.layer_norm = LayerNorm(encoder.size) self.generator = generator
def __init__(self, layer, N): super(Decoder, self).__init__() self.layers = clones(layer, N) self.norm = LayerNorm(layer.size)
def __init__(self, layer, N): super(Encoder, self).__init__() self.layers = clones(layer, N) #TODO: Use BatchNorm instead? self.norm = LayerNorm(layer.size)
def __init__(self, inp_dim, out_dim, bidirectional=True, dropout_p=[0.2], use_batchnorm=[True], use_layernorm=[False], use_inp_layernorm=False, use_inp_batchnorm=True, orth_init=[True], ligru_act=nn.ReLU, to_do='train', use_cuda=True): super(liGRU, self).__init__() # Reading parameters self.input_dim = inp_dim self.out_dim = out_dim self.dropout_p = dropout_p self.use_batchnorm = use_batchnorm self.use_layernorm = use_layernorm self.use_inp_layernorm = use_inp_layernorm self.use_inp_batchnorm = use_inp_batchnorm self.orth_init = orth_init self.ligru_act = ligru_act self.bidirectional = bidirectional self.use_cuda = use_cuda self.to_do = to_do if self.to_do == 'train': self.test_flag = False else: self.test_flag = True # List initialization self.wh = nn.ModuleList([]) self.uh = nn.ModuleList([]) self.wz = nn.ModuleList([]) # Update Gate self.uz = nn.ModuleList([]) # Update Gate self.ln = nn.ModuleList([]) # Layer Norm self.bn_wh = nn.ModuleList([]) # Batch Norm self.bn_wz = nn.ModuleList([]) # Batch Norm self.act = nn.ModuleList([]) # Activations # Input layer normalization if self.use_inp_layernorm: self.ln0 = LayerNorm(self.input_dim) # Input batch normalization if self.use_inp_batchnorm: self.bn0 = nn.BatchNorm1d(self.input_dim, momentum=0.05) self.N_ligru_lay = len(self.out_dim) current_input = self.input_dim # Initialization of hidden layers for i in range(self.N_ligru_lay): # Activations self.act += [self.ligru_act()] add_bias = True if self.use_layernorm[i] or self.use_batchnorm[i]: add_bias = False # Feed-forward connections self.wh.append( nn.Linear(current_input, self.out_dim[i], bias=add_bias)) self.wz.append( nn.Linear(current_input, self.out_dim[i], bias=add_bias)) # Recurrent connections self.uh.append( nn.Linear(self.out_dim[i], self.out_dim[i], bias=False)) self.uz.append( nn.Linear(self.out_dim[i], self.out_dim[i], bias=False)) if self.orth_init: nn.init.orthogonal_(self.uh[i].weight) nn.init.orthogonal_(self.uz[i].weight) # batch norm initialization self.bn_wh.append(nn.BatchNorm1d(self.out_dim[i], momentum=0.05)) self.bn_wz.append(nn.BatchNorm1d(self.out_dim[i], momentum=0.05)) self.ln.append(LayerNorm(self.out_dim[i])) if self.bidirectional: current_input = 2 * self.out_dim[i] else: current_input = self.out_dim[i] self.final_dim = self.out_dim[i] + self.bidirectional * self.out_dim[i]
def __init__(self, size, dropout): super(SublayerConnection, self).__init__() #TODO: Maybe use BatchNorm? self.norm = LayerNorm(size) self.dropout = nn.Dropout(dropout)