def __init__(self, idim, odim, dropout_rate): super(Conv2dSubsampling, self).__init__() self.conv = t.nn.Sequential(t.nn.Conv2d(1, odim, 3, 2), Gelu(), t.nn.Conv2d(odim, odim, 3, 2), Gelu()) self.out = t.nn.Sequential( t.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim), PositionalEncoding(odim, dropout_rate))
def __init__(self, input_size, output_size, rank): super(LowRankLinear, self).__init__() self.input_linear = t.nn.Linear(input_size, rank, bias=False) self.output_linear = t.nn.Linear(rank, output_size, bias=False) self.gelu = Gelu() t.nn.init.xavier_normal_(self.input_linear.weight) t.nn.init.xavier_normal_(self.output_linear.weight)
def __init__(self, input_size, d_model, dropout_rate=0.0): super(LinearWithPosEmbedding2, self).__init__() self.linear = t.nn.Sequential(t.nn.Linear(input_size, d_model), t.nn.LayerNorm(d_model), t.nn.Dropout(dropout_rate), Gelu()) self.pos_embedding = ScaledPositionalEncoding(d_model, dropout_rate) nn.init.xavier_normal_(self.linear.weight)
def __init__(self, encoder_dim, lm_dim, model_size, vocab_size): super(JoinNet, self).__init__() self.linear = t.nn.Sequential( t.nn.Linear(in_features=encoder_dim + lm_dim, out_features=model_size, bias=True), Gelu(), t.nn.Linear(in_features=model_size, out_features=vocab_size, bias=True)) t.nn.init.xavier_normal_(self.linear[0].weight) t.nn.init.xavier_normal_(self.linear[2].weight)
def __init__(self, input_size, inner_size, dropout, use_low_rank=False): super(FeedForwardBlock, self).__init__() if not use_low_rank: self.linear1 = t.nn.Linear(input_size, inner_size, bias=True) t.nn.init.xavier_normal_(self.linear1.weight) else: self.linear1 = LowRankLinear(input_size, inner_size, rank=128) self.gelu = Gelu() self.dropout = t.nn.Dropout(dropout, inplace=True) if not use_low_rank: self.linear2 = t.nn.Linear(inner_size, input_size, bias=True) t.nn.init.xavier_normal_(self.linear2.weight) else: self.linear2 = LowRankLinear(inner_size, input_size, rank=128) self.layer_norm = t.nn.LayerNorm(input_size) self.dropout = t.nn.Dropout(dropout, inplace=True)
def __init__(self, input_size, inner_size, dropout, use_low_rank=False): super(FeedForwardReZeroBlock, self).__init__() if not use_low_rank: self.linear1 = t.nn.Linear(input_size, inner_size, bias=True) t.nn.init.xavier_normal_(self.linear1.weight) else: self.linear1 = LowRankLinear(input_size, inner_size, rank=128) self.gelu = Gelu() self.dropout = t.nn.Dropout(dropout, inplace=True) if not use_low_rank: self.linear2 = t.nn.Linear(inner_size, input_size, bias=True) t.nn.init.xavier_normal_(self.linear2.weight) else: self.linear2 = LowRankLinear(inner_size, input_size, rank=128) self.dropout = t.nn.Dropout(dropout, inplace=True) self.rezero_alpha = t.nn.Parameter(t.Tensor([0]))