Esempio n. 1
0
    def __init__(self,
                 d_model=512,
                 nhead=8,
                 num_encoder_layers=6,
                 num_decoder_layers=6,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu",
                 normalize_before=False,
                 return_intermediate_dec=False):
        super().__init__()

        encoder_layer = TransformerEncoderLayer(d_model, nhead,
                                                dim_feedforward, dropout,
                                                activation, normalize_before)
        encoder_norm = dg.LayerNorm(d_model) if normalize_before else None
        self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers,
                                          encoder_norm)

        decoder_layer = TransformerDecoderLayer(d_model, nhead,
                                                dim_feedforward, dropout,
                                                activation, normalize_before)
        decoder_norm = dg.LayerNorm(d_model)
        self.decoder = TransformerDecoder(
            decoder_layer,
            num_decoder_layers,
            decoder_norm,
            return_intermediate=return_intermediate_dec)

        self.d_model = d_model
        self.nhead = nhead
Esempio n. 2
0
    def __init__(self, input_size, out_channels, filter_size, dropout=0.1):
        """Duration Predictor block in FastSpeech.

        Args:
            input_size (int): the channel number of input.
            out_channels (int): the output channel number.
            filter_size (int): the filter size.
            dropout (float, optional): dropout probability. Defaults to 0.1.
        """
        super(DurationPredictor, self).__init__()
        self.input_size = input_size
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.dropout = dropout

        k = math.sqrt(1.0 / self.input_size)
        self.conv1 = Conv1D(
            num_channels=self.input_size,
            num_filters=self.out_channels,
            filter_size=self.filter_size,
            padding=1,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.XavierInitializer()),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-k, high=k)))
        #data_format='NTC')
        k = math.sqrt(1.0 / self.out_channels)
        self.conv2 = Conv1D(
            num_channels=self.out_channels,
            num_filters=self.out_channels,
            filter_size=self.filter_size,
            padding=1,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.XavierInitializer()),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-k, high=k)))
        #data_format='NTC')
        self.layer_norm1 = dg.LayerNorm(self.out_channels)
        self.layer_norm2 = dg.LayerNorm(self.out_channels)

        self.weight = fluid.ParamAttr(
            initializer=fluid.initializer.XavierInitializer())
        k = math.sqrt(1.0 / self.out_channels)
        self.bias = fluid.ParamAttr(
            initializer=fluid.initializer.Uniform(low=-k, high=k))

        self.linear = dg.Linear(self.out_channels,
                                1,
                                param_attr=self.weight,
                                bias_attr=self.bias)
Esempio n. 3
0
def _build_ln(n_in, name):
    return D.LayerNorm(normalized_shape=n_in, 
            param_attr=F.ParamAttr(name='%s_layer_norm_scale' % name if name is not None else None, 
                initializer=F.initializer.Constant(1.)),
            bias_attr=F.ParamAttr(name='%s_layer_norm_bias' % name if name is not None else None, 
                initializer=F.initializer.Constant(1.)),
        )
Esempio n. 4
0
    def __init__(self,
                 d_in,
                 num_hidden,
                 filter_size,
                 padding=0,
                 use_cudnn=True,
                 dropout=0.1):
        """A two-feed-forward-layer module.

        Args:
            d_in (int): the size of input channel.
            num_hidden (int): the size of hidden layer in network.
            filter_size (int): the filter size of Conv
            padding (int, optional): the padding size of Conv. Defaults to 0.
            use_cudnn (bool, optional): use cudnn in Conv or not. Defaults to True.
            dropout (float, optional): dropout probability. Defaults to 0.1.
        """
        super(PositionwiseFeedForward, self).__init__()
        self.num_hidden = num_hidden
        self.use_cudnn = use_cudnn
        self.dropout = dropout

        k = math.sqrt(1.0 / d_in)
        self.w_1 = Conv1D(
            num_channels=d_in,
            num_filters=num_hidden,
            filter_size=filter_size,
            padding=padding,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.XavierInitializer()),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-k, high=k)),
            use_cudnn=use_cudnn)
        k = math.sqrt(1.0 / num_hidden)
        self.w_2 = Conv1D(
            num_channels=num_hidden,
            num_filters=d_in,
            filter_size=filter_size,
            padding=padding,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.XavierInitializer()),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-k, high=k)),
            use_cudnn=use_cudnn)
        self.layer_norm = dg.LayerNorm(d_in)
Esempio n. 5
0
    def __init__(self,
                 num_hidden,
                 d_k,
                 d_q,
                 num_head=4,
                 is_bias=False,
                 dropout=0.1,
                 is_concat=True):
        """Multihead Attention.

        Args:
            num_hidden (int): the number of hidden layer in network.
            d_k (int): the dim of key in multihead attention.
            d_q (int): the dim of query in multihead attention.
            num_head (int, optional): the head number of multihead attention. Defaults to 4.
            is_bias (bool, optional): whether have bias in linear layers. Default to False.
            dropout (float, optional): dropout probability of FFTBlock. Defaults to 0.1.
            is_concat (bool, optional): whether concat query and result. Default to True.
        """
        super(MultiheadAttention, self).__init__()
        self.num_hidden = num_hidden
        self.num_head = num_head
        self.d_k = d_k
        self.d_q = d_q
        self.dropout = dropout
        self.is_concat = is_concat

        self.key = Linear(num_hidden, num_head * d_k, is_bias=is_bias)
        self.value = Linear(num_hidden, num_head * d_k, is_bias=is_bias)
        self.query = Linear(num_hidden, num_head * d_q, is_bias=is_bias)

        self.scal_attn = ScaledDotProductAttention(d_k)

        if self.is_concat:
            self.fc = Linear(num_head * d_q * 2, num_hidden)
        else:
            self.fc = Linear(num_head * d_q, num_hidden)

        self.layer_norm = dg.LayerNorm(num_hidden)
def get_activation_norm_layer(num_features, norm_type, input_dim, **norm_params):
    """
    Return an activation normalization layer.
    """
    input_dim = max(input_dim, 1)
    assert input_dim == 2 or input_dim == 1, 'Only support for 2D currently'

    if norm_type == 'none' or norm_type == '':
        norm_layer = None
    elif norm_type == 'batch':
        norm_layer = dg.BatchNorm(num_features, **norm_params)
    elif norm_type == 'instance':
        affine = norm_params.pop('affine', True)
        if not affine:
            norm_params['param_attr'] = False
            norm_params['bias_attr'] = False
        norm_layer = dg.InstanceNorm(num_features, **norm_params) #affine=affine, **norm_params)
    elif norm_type == 'sync_batch':
        affine = norm_params.pop('affine', True)
        norm_layer = dg.BatchNorm(num_features, **norm_params)
        F.BuildStrategy().sync_batch_norm = True
    elif norm_type == 'layer':
        norm_layer = dg.LayerNorm(num_features, **norm_params)
    elif norm_type == 'layer_2d':
        raise NotImplementedError()
    elif norm_type == 'adaptive':
        norm_layer = AdaptiveNorm(num_features, **norm_params)
    elif norm_type == 'spatially_adaptive':
        if input_dim != 2:
            raise ValueError("Spatially adaptive normalization layers only supports 2D input")
        norm_layer = SpatiallyAdaptiveNorm(num_features, **norm_params)
    elif norm_type == 'hyper_spatially_adaptive':
        if input_dim != 2:
            raise ValueError("Spatially adaptive normalization layers only supports 2D input")
        norm_layer = HyperSpatiallyAdaptiveNorm(num_features, **norm_params)
    else:
        raise ValueError("Activation norm layer %s is not recognized" % norm_type)

    return norm_layer