def __init__(self, hidden_size=512, num_heads=8, dropout=.1): super(Sublayer3, self).__init__() self.lnorm = LayerNorm(hidden_size) self.sublayer = MultiheadAttention( attention_size=hidden_size, num_heads=num_heads, dropout=dropout)
def __init__(self, hidden_size=512, num_heads=8, dropout=0.1, prenorm=True, scalenorm=True): super(Sublayer3, self).__init__() self.sublayer = MultiheadAttention( attention_size=hidden_size, num_heads=num_heads, dropout=dropout, nystrom=False, # Nystrom used only for self-attention kernel_size= None, # convolutional residual not used when subsequent mask ) self.prenorm = prenorm self.lnorm = LayerNorm(hidden_size) if not scalenorm else ScaleNorm( hidden_size) if self.prenorm: self.lnormy = (LayerNorm(hidden_size) if not scalenorm else ScaleNorm(hidden_size))
def __init__( self, hidden_size=512, inner_size=2048, dropout=0.1, prenorm=True, scalenorm=True, ): super(Sublayer2, self).__init__() self.sublayer = PositionwiseFF(hidden_size, inner_size, dropout=dropout) self.prenorm = prenorm self.lnorm = LayerNorm(hidden_size) if not scalenorm else ScaleNorm( hidden_size)
def __init__(self, n_in, n_out, activation='relu', layer_norm=True, bias=True, dropout=.1): super(FF, self).__init__() self.fc = nn.Linear(n_in, n_out, bias=bias) self.activation = NON_LINEARITIES.get(activation, nn.ReLU) if self.activation is not None: self.activation = self.activation() self.layer_norm = None if layer_norm: self.layer_norm = LayerNorm(n_out) self.drop = nn.Dropout(dropout)
def __init__( self, hidden_size=512, num_heads=8, dropout=0.1, nystrom=False, num_landmarks=32, kernel_size=None, prenorm=True, scalenorm=True, ): super(Sublayer1, self).__init__() self.sublayer = MultiheadAttention( attention_size=hidden_size, num_heads=num_heads, dropout=dropout, nystrom=nystrom, kernel_size=kernel_size, num_landmarks=num_landmarks, ) self.prenorm = prenorm self.lnorm = LayerNorm(hidden_size) if not scalenorm else ScaleNorm( hidden_size)
def __init__(self, hidden_size=512, inner_size=2048, dropout=.1): super(Sublayer2, self).__init__() self.lnorm = LayerNorm(hidden_size) self.sublayer = PositionwiseFF(hidden_size, inner_size, dropout=dropout)