def __init__(self, n_units, d_units=0, h=8, dropout=0.1, initialW=None, initial_bias=None): """Initialize DecoderLayer.""" super(DecoderLayer, self).__init__() with self.init_scope(): self.self_attn = MultiHeadAttention(n_units, h, dropout=dropout, initialW=initialW, initial_bias=initial_bias) self.src_attn = MultiHeadAttention(n_units, h, dropout=dropout, initialW=initialW, initial_bias=initial_bias) self.feed_forward = PositionwiseFeedForward( n_units, d_units=d_units, dropout=dropout, initialW=initialW, initial_bias=initial_bias) self.norm1 = LayerNorm(n_units) self.norm2 = LayerNorm(n_units) self.norm3 = LayerNorm(n_units) self.dropout = dropout
def __init__(self, idim, args, initialW=None, initial_bias=None): super(Encoder, self).__init__() initialW = chainer.initializers.Uniform if initialW is None else initialW initial_bias = chainer.initializers.Uniform if initial_bias is None else initial_bias with self.init_scope(): channels = 64 # Based in paper if args.transformer_input_layer == 'conv2d': idim = int(np.ceil(np.ceil(idim / 2) / 2)) * channels self.input_layer = Conv2dSubsampling(channels, idim, args.adim, dropout=args.dropout_rate, initialW=initialW, initial_bias=initial_bias) elif args.transformer_input_layer == 'linear': self.input_layer = LinearSampling(idim, args.adim, initialW=initialW, initial_bias=initial_bias) else: raise ValueError('Incorrect type of input layer') self.norm = LayerNorm(args.adim) for i in range(args.elayers): name = 'encoders.' + str(i) layer = EncoderLayer(args.adim, d_units=args.eunits, h=args.aheads, dropout=args.dropout_rate, initialW=initialW, initial_bias=initial_bias) self.add_link(name, layer) self.n_layers = args.elayers
def __init__(self, idim, attention_dim=256, attention_heads=4, linear_units=2048, num_blocks=6, dropout_rate=0.1, positional_dropout_rate=0.1, attention_dropout_rate=0.0, input_layer="conv2d", pos_enc_class=PositionalEncoding, initialW=None, initial_bias=None): """Initialize Encoder. Args: idim (int): Input dimension. args (Namespace): Training config. initialW (int, optional): Initializer to initialize the weight. initial_bias (bool, optional): Initializer to initialize the bias. """ super(Encoder, self).__init__() initialW = chainer.initializers.Uniform if initialW is None else initialW initial_bias = chainer.initializers.Uniform if initial_bias is None else initial_bias self.do_history_mask = False with self.init_scope(): channels = 64 # Based in paper if input_layer == 'conv2d': idim = int(np.ceil(np.ceil(idim / 2) / 2)) * channels self.input_layer = Conv2dSubsampling(channels, idim, attention_dim, dropout=dropout_rate, initialW=initialW, initial_bias=initial_bias) elif input_layer == 'linear': self.input_layer = LinearSampling(idim, attention_dim, initialW=initialW, initial_bias=initial_bias) elif input_layer == "embed": self.input_layer = chainer.Sequential( L.EmbedID(idim, attention_dim, ignore_label=-1), pos_enc_class(attention_dim, positional_dropout_rate)) self.do_history_mask = True else: raise ValueError("unknown input_layer: " + input_layer) self.norm = LayerNorm(attention_dim) for i in range(num_blocks): name = 'encoders.' + str(i) layer = EncoderLayer(attention_dim, d_units=linear_units, h=attention_heads, dropout=attention_dropout_rate, initialW=initialW, initial_bias=initial_bias) self.add_link(name, layer) self.n_layers = num_blocks
def __init__(self, odim, args, initialW=None, initial_bias=None): super(Decoder, self).__init__() initialW = chainer.initializers.Uniform if initialW is None else initialW initial_bias = chainer.initializers.Uniform if initial_bias is None else initial_bias with self.init_scope(): self.output_norm = LayerNorm(args.adim) self.pe = PositionalEncoding(args.adim, args.dropout_rate) stvd = 1. / np.sqrt(args.adim) self.output_layer = L.Linear(args.adim, odim, initialW=initialW(scale=stvd), initial_bias=initial_bias(scale=stvd)) self.embed = L.EmbedID(odim, args.adim, ignore_label=-1, initialW=chainer.initializers.Normal(scale=1.0)) for i in range(args.dlayers): name = 'decoders.' + str(i) layer = DecoderLayer(args.adim, d_units=args.dunits, h=args.aheads, dropout=args.dropout_rate, initialW=initialW, initial_bias=initial_bias) self.add_link(name, layer) self.n_layers = args.dlayers