예제 #1
0
 def __init__(
         self,
         size,
         self_attn,
         src_attn,
         feed_forward,
         dropout_rate,
         normalize_before=True,
         concat_after=False,
 ):
     """Construct an DecoderLayer object."""
     super(DecoderLayer, self).__init__()
     self.size = size
     self.self_attn = self_attn
     self.src_attn = src_attn
     self.feed_forward = feed_forward
     self.norm1 = LayerNorm(size)
     self.norm2 = LayerNorm(size)
     self.norm3 = LayerNorm(size)
     self.dropout = nn.Dropout(dropout_rate)
     self.normalize_before = normalize_before
     self.concat_after = concat_after
     if self.concat_after:
         self.concat_linear1 = nn.Linear(size + size, size)
         self.concat_linear2 = nn.Linear(size + size, size)
예제 #2
0
    def __init__(self, state_dim, action_dim,
                 hidden_dim, output_dim, use_layer_norm=False):
        super(Critic, self).__init__()

        self.state_dim = state_dim
        self.action_dim = action_dim
        self.hidden = hidden_dim
        self.output_dim = output_dim
        self.use_layer_norm = use_layer_norm

        # Architecture
        self.input = nn.Linear(in_features=self.state_dim+self.action_dim, out_features=self.hidden)
        self.hidden_1 = nn.Linear(in_features=self.hidden, out_features=self.hidden*2)
        self.hidden_2 = nn.Linear(in_features=self.hidden*2, out_features=self.hidden*2)
        self.output = nn.Linear(in_features=self.hidden*2, out_features=self.output_dim)

        if self.use_layer_norm:
            self.ln1 = LayerNorm(self.hidden)
            self.ln2 = LayerNorm(self.hidden*2)
            self.ln3 = LayerNorm(self.hidden*2)

        # Leaky Relu activation
        self.lrelu = nn.LeakyReLU()

        # Initialize the weights with xavier initialization
        nn.init.xavier_uniform_(self.input.weight)
        nn.init.xavier_uniform_(self.hidden_1.weight)
        nn.init.xavier_uniform_(self.hidden_2.weight)
        nn.init.xavier_uniform_(self.output.weight)
예제 #3
0
 def __init__(
     self,
     size,
     self_attn,
     feed_forward,
     feed_forward_macaron,
     conv_module,
     dropout_rate,
     normalize_before=True,
     concat_after=False,
 ):
     """Construct an EncoderLayer object."""
     super(EncoderLayer, self).__init__()
     self.self_attn = self_attn
     self.feed_forward = feed_forward
     self.feed_forward_macaron = feed_forward_macaron
     self.conv_module = conv_module
     self.norm_ff = LayerNorm(size)  # for the FNN module
     self.norm_mha = LayerNorm(size)  # for the MHA module
     if feed_forward_macaron is not None:
         self.norm_ff_macaron = LayerNorm(size)
         self.ff_scale = 0.5
     else:
         self.ff_scale = 1.0
     if self.conv_module is not None:
         self.norm_conv = LayerNorm(size)  # for the CNN module
         self.norm_final = LayerNorm(
             size)  # for the final output of the block
     self.dropout = nn.Dropout(dropout_rate)
     self.size = size
     self.normalize_before = normalize_before
     self.concat_after = concat_after
     if self.concat_after:
         self.concat_linear = nn.Linear(size + size, size)
 def __init__(
     self,
     size,
     self_attn,
     feed_forward,
     dropout_rate,
     normalize_before=True,
     concat_after=False,
 ):
     super(EncoderLayer, self).__init__()
     self.self_attn = self_attn
     self.feed_forward = feed_forward
     self.norm1 = LayerNorm(size)
     self.norm2 = LayerNorm(size)
     self.dropout = nn.Dropout(dropout_rate)
     self.size = size
     self.normalize_before = normalize_before
     self.concat_after = concat_after
     if self.concat_after:
         self.concat_linear = nn.Linear(size + size, size)
예제 #5
0
    def __init__(self, state_dim, action_dim,
                 hidden_dim, use_tanh=False,
                 use_sigmoid=False, deterministic=False, use_layernorm=False):
        super(StochasticActor, self).__init__()

        self.state_dim = state_dim
        self.action_dim = action_dim
        self.hidden = hidden_dim
        self.use_tanh = use_tanh
        self.use_sigmoid = use_sigmoid
        self.deterministic = deterministic
        self.use_layernorm = use_layernorm

        # Architecture
        self.input = nn.Linear(in_features=self.state_dim, out_features=self.hidden)
        self.hidden_1 = nn.Linear(in_features=self.hidden, out_features=self.hidden*2)
        self.hidden_2 = nn.Linear(in_features=self.hidden*2, out_features=self.hidden*2)
        self.output_mu = nn.Linear(in_features=self.hidden*2, out_features=self.action_dim)
        self.output_logstd = nn.Linear(in_features=self.hidden*2, out_features=self.action_dim)

        if self.use_layernorm:
            self.ln1 =  LayerNorm(self.hidden)
            self.ln2 = LayerNorm(self.hidden*2)
            self.ln3 = LayerNorm(self.hidden*2)

        # Leaky Relu activation function
        self.lrelu = nn.LeakyReLU()

        #Output Activation function
        self.tanh = nn.Tanh()

        # Output Activation sigmoid function
        self.sigmoid = nn.Sigmoid()

        # Initialize the weights with xavier initialization
        nn.init.xavier_uniform_(self.input.weight)
        nn.init.xavier_uniform_(self.hidden_1.weight)
        nn.init.xavier_uniform_(self.hidden_2.weight)
        nn.init.xavier_uniform_(self.output_mu.weight)
        nn.init.xavier_uniform_(self.output_logstd.weight)
    def __init__(self,
                 idim,
                 selfattention_layer_type="selfattn",
                 attention_dim=256,
                 attention_heads=4,
                 linear_units=2048,
                 num_blocks=6,
                 dropout_rate=0.1,
                 positional_dropout_rate=0.1,
                 attention_dropout_rate=0.0,
                 input_layer="conv2d",
                 pos_enc_class=PositionalEncoding,
                 normalize_before=True,
                 concat_after=False,
                 positionwise_layer_type="linear",
                 positionwise_conv_kernel_size=1):
        super(Encoder, self).__init__()

        self.conv_subsampling_factor = 1
        self.embed = torch.nn.Sequential(
            input_layer,
            pos_enc_class(attention_dim, positional_dropout_rate),
        )
        self.normalize_before = normalize_before
        positionwise_layer, positionwise_layer_args = self.get_positionwise_layer(
            positionwise_layer_type,
            attention_dim,
            linear_units,
            dropout_rate,
            positionwise_conv_kernel_size,
        )
        self.encoders = repeat(
            num_blocks,
            lambda lnum: EncoderLayer(
                attention_dim,
                MultiHeadedAttention(attention_heads, attention_dim,
                                     attention_dropout_rate),
                positionwise_layer(*positionwise_layer_args),
                dropout_rate,
                normalize_before,
                concat_after,
            ),
        )
        if self.normalize_before:
            self.after_norm = LayerNorm(attention_dim)
    def __init__(self, idim, n_layers=2, n_chans=384, kernel_size=3, bias=True, dropout_rate=0.5, ):
        """
        Initilize duration predictor module.

        Args:
            idim (int): Input dimension.
            n_layers (int, optional): Number of convolutional layers.
            n_chans (int, optional): Number of channels of convolutional layers.
            kernel_size (int, optional): Kernel size of convolutional layers.
            dropout_rate (float, optional): Dropout rate.
        """
        super().__init__()
        self.conv = torch.nn.ModuleList()
        for idx in range(n_layers):
            in_chans = idim if idx == 0 else n_chans
            self.conv += [
                torch.nn.Sequential(torch.nn.Conv1d(in_chans, n_chans, kernel_size, stride=1, padding=(kernel_size - 1) // 2, bias=bias, ), torch.nn.ReLU(),
                                    LayerNorm(n_chans, dim=1), torch.nn.Dropout(dropout_rate), )]
        self.linear = torch.nn.Linear(n_chans, 1)
    def __init__(self,
                 idim,
                 n_layers=2,
                 n_chans=384,
                 kernel_size=3,
                 dropout_rate=0.1,
                 offset=1.0):
        """
        Initialize duration predictor module.

        Args:
            idim (int): Input dimension.
            n_layers (int, optional): Number of convolutional layers.
            n_chans (int, optional): Number of channels of convolutional layers.
            kernel_size (int, optional): Kernel size of convolutional layers.
            dropout_rate (float, optional): Dropout rate.
            offset (float, optional): Offset value to avoid nan in log domain.

        """
        super(DurationPredictor, self).__init__()
        self.offset = offset
        self.conv = torch.nn.ModuleList()
        for idx in range(n_layers):
            in_chans = idim if idx == 0 else n_chans
            self.conv += [
                torch.nn.Sequential(
                    torch.nn.Conv1d(
                        in_chans,
                        n_chans,
                        kernel_size,
                        stride=1,
                        padding=(kernel_size - 1) // 2,
                    ),
                    torch.nn.ReLU(),
                    LayerNorm(n_chans, dim=1),
                    torch.nn.Dropout(dropout_rate),
                )
            ]
        self.linear = torch.nn.Linear(n_chans, 1)
예제 #9
0
 def __init__(
         self,
         odim,
         selfattention_layer_type="selfattn",
         attention_dim=256,
         attention_heads=4,
         conv_wshare=4,
         conv_kernel_length=11,
         conv_usebias=False,
         linear_units=2048,
         num_blocks=6,
         dropout_rate=0.1,
         positional_dropout_rate=0.1,
         self_attention_dropout_rate=0.0,
         src_attention_dropout_rate=0.0,
         input_layer="embed",
         use_output_layer=True,
         pos_enc_class=PositionalEncoding,
         normalize_before=True,
         concat_after=False,
 ):
     """Construct an Decoder object."""
     torch.nn.Module.__init__(self)
     if input_layer == "embed":
         self.embed = torch.nn.Sequential(
             torch.nn.Embedding(odim, attention_dim),
             pos_enc_class(attention_dim, positional_dropout_rate),
         )
     elif input_layer == "linear":
         self.embed = torch.nn.Sequential(
             torch.nn.Linear(odim, attention_dim),
             torch.nn.LayerNorm(attention_dim),
             torch.nn.Dropout(dropout_rate),
             torch.nn.ReLU(),
             pos_enc_class(attention_dim, positional_dropout_rate),
         )
     elif isinstance(input_layer, torch.nn.Module):
         self.embed = torch.nn.Sequential(
             input_layer, pos_enc_class(attention_dim, positional_dropout_rate)
         )
     else:
         raise NotImplementedError("only `embed` or torch.nn.Module is supported.")
     self.normalize_before = normalize_before
     self.decoders = repeat(
         num_blocks,
         lambda lnum: DecoderLayer(
             attention_dim,
             MultiHeadedAttention(attention_heads, attention_dim, self_attention_dropout_rate),
             MultiHeadedAttention(attention_heads, attention_dim, src_attention_dropout_rate),
             PositionwiseFeedForward(attention_dim, linear_units, dropout_rate),
             dropout_rate,
             normalize_before,
             concat_after,
         ),
     )
     self.selfattention_layer_type = selfattention_layer_type
     if self.normalize_before:
         self.after_norm = LayerNorm(attention_dim)
     if use_output_layer:
         self.output_layer = torch.nn.Linear(attention_dim, odim)
     else:
         self.output_layer = None
예제 #10
0
    def __init__(self,
                 idim,
                 attention_dim=256,
                 attention_heads=4,
                 linear_units=2048,
                 num_blocks=6,
                 dropout_rate=0.1,
                 positional_dropout_rate=0.1,
                 attention_dropout_rate=0.0,
                 input_layer="conv2d",
                 normalize_before=True,
                 concat_after=False,
                 positionwise_conv_kernel_size=1,
                 macaron_style=False,
                 use_cnn_module=False,
                 cnn_module_kernel=31,
                 zero_triu=False):
        """Construct a Conformer object."""
        super(Conformer, self).__init__()

        activation = Swish()

        self.conv_subsampling_factor = 1

        if isinstance(input_layer, torch.nn.Module):
            self.embed = torch.nn.Sequential(
                input_layer,
                RelPositionalEncoding(attention_dim, positional_dropout_rate),
            )
        elif input_layer is None:
            self.embed = torch.nn.Sequential(
                RelPositionalEncoding(attention_dim, positional_dropout_rate))
        else:
            raise ValueError("unknown input_layer: " + input_layer)

        self.normalize_before = normalize_before

        # self-attention module definition
        encoder_selfattn_layer = RelPositionMultiHeadedAttention
        encoder_selfattn_layer_args = (attention_heads, attention_dim,
                                       attention_dropout_rate, zero_triu)

        # feed-forward module definition
        positionwise_layer = MultiLayeredConv1d
        positionwise_layer_args = (
            attention_dim,
            linear_units,
            positionwise_conv_kernel_size,
            dropout_rate,
        )

        # convolution module definition
        convolution_layer = ConvolutionModule
        convolution_layer_args = (attention_dim, cnn_module_kernel, activation)

        self.encoders = repeat(
            num_blocks, lambda lnum: EncoderLayer(
                attention_dim,
                encoder_selfattn_layer(*encoder_selfattn_layer_args),
                positionwise_layer(*positionwise_layer_args),
                positionwise_layer(*positionwise_layer_args)
                if macaron_style else None,
                convolution_layer(*convolution_layer_args) if use_cnn_module
                else None, dropout_rate, normalize_before, concat_after))
        if self.normalize_before:
            self.after_norm = LayerNorm(attention_dim)