def __init__(self, d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation="relu", normalize_before=False, return_intermediate_dec=False): super().__init__() encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation, normalize_before) encoder_norm = dg.LayerNorm(d_model) if normalize_before else None self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, activation, normalize_before) decoder_norm = dg.LayerNorm(d_model) self.decoder = TransformerDecoder( decoder_layer, num_decoder_layers, decoder_norm, return_intermediate=return_intermediate_dec) self.d_model = d_model self.nhead = nhead
def __init__(self, input_size, out_channels, filter_size, dropout=0.1): """Duration Predictor block in FastSpeech. Args: input_size (int): the channel number of input. out_channels (int): the output channel number. filter_size (int): the filter size. dropout (float, optional): dropout probability. Defaults to 0.1. """ super(DurationPredictor, self).__init__() self.input_size = input_size self.out_channels = out_channels self.filter_size = filter_size self.dropout = dropout k = math.sqrt(1.0 / self.input_size) self.conv1 = Conv1D( num_channels=self.input_size, num_filters=self.out_channels, filter_size=self.filter_size, padding=1, param_attr=fluid.ParamAttr( initializer=fluid.initializer.XavierInitializer()), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-k, high=k))) #data_format='NTC') k = math.sqrt(1.0 / self.out_channels) self.conv2 = Conv1D( num_channels=self.out_channels, num_filters=self.out_channels, filter_size=self.filter_size, padding=1, param_attr=fluid.ParamAttr( initializer=fluid.initializer.XavierInitializer()), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-k, high=k))) #data_format='NTC') self.layer_norm1 = dg.LayerNorm(self.out_channels) self.layer_norm2 = dg.LayerNorm(self.out_channels) self.weight = fluid.ParamAttr( initializer=fluid.initializer.XavierInitializer()) k = math.sqrt(1.0 / self.out_channels) self.bias = fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-k, high=k)) self.linear = dg.Linear(self.out_channels, 1, param_attr=self.weight, bias_attr=self.bias)
def _build_ln(n_in, name): return D.LayerNorm(normalized_shape=n_in, param_attr=F.ParamAttr(name='%s_layer_norm_scale' % name if name is not None else None, initializer=F.initializer.Constant(1.)), bias_attr=F.ParamAttr(name='%s_layer_norm_bias' % name if name is not None else None, initializer=F.initializer.Constant(1.)), )
def __init__(self, d_in, num_hidden, filter_size, padding=0, use_cudnn=True, dropout=0.1): """A two-feed-forward-layer module. Args: d_in (int): the size of input channel. num_hidden (int): the size of hidden layer in network. filter_size (int): the filter size of Conv padding (int, optional): the padding size of Conv. Defaults to 0. use_cudnn (bool, optional): use cudnn in Conv or not. Defaults to True. dropout (float, optional): dropout probability. Defaults to 0.1. """ super(PositionwiseFeedForward, self).__init__() self.num_hidden = num_hidden self.use_cudnn = use_cudnn self.dropout = dropout k = math.sqrt(1.0 / d_in) self.w_1 = Conv1D( num_channels=d_in, num_filters=num_hidden, filter_size=filter_size, padding=padding, param_attr=fluid.ParamAttr( initializer=fluid.initializer.XavierInitializer()), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-k, high=k)), use_cudnn=use_cudnn) k = math.sqrt(1.0 / num_hidden) self.w_2 = Conv1D( num_channels=num_hidden, num_filters=d_in, filter_size=filter_size, padding=padding, param_attr=fluid.ParamAttr( initializer=fluid.initializer.XavierInitializer()), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-k, high=k)), use_cudnn=use_cudnn) self.layer_norm = dg.LayerNorm(d_in)
def __init__(self, num_hidden, d_k, d_q, num_head=4, is_bias=False, dropout=0.1, is_concat=True): """Multihead Attention. Args: num_hidden (int): the number of hidden layer in network. d_k (int): the dim of key in multihead attention. d_q (int): the dim of query in multihead attention. num_head (int, optional): the head number of multihead attention. Defaults to 4. is_bias (bool, optional): whether have bias in linear layers. Default to False. dropout (float, optional): dropout probability of FFTBlock. Defaults to 0.1. is_concat (bool, optional): whether concat query and result. Default to True. """ super(MultiheadAttention, self).__init__() self.num_hidden = num_hidden self.num_head = num_head self.d_k = d_k self.d_q = d_q self.dropout = dropout self.is_concat = is_concat self.key = Linear(num_hidden, num_head * d_k, is_bias=is_bias) self.value = Linear(num_hidden, num_head * d_k, is_bias=is_bias) self.query = Linear(num_hidden, num_head * d_q, is_bias=is_bias) self.scal_attn = ScaledDotProductAttention(d_k) if self.is_concat: self.fc = Linear(num_head * d_q * 2, num_hidden) else: self.fc = Linear(num_head * d_q, num_hidden) self.layer_norm = dg.LayerNorm(num_hidden)
def get_activation_norm_layer(num_features, norm_type, input_dim, **norm_params): """ Return an activation normalization layer. """ input_dim = max(input_dim, 1) assert input_dim == 2 or input_dim == 1, 'Only support for 2D currently' if norm_type == 'none' or norm_type == '': norm_layer = None elif norm_type == 'batch': norm_layer = dg.BatchNorm(num_features, **norm_params) elif norm_type == 'instance': affine = norm_params.pop('affine', True) if not affine: norm_params['param_attr'] = False norm_params['bias_attr'] = False norm_layer = dg.InstanceNorm(num_features, **norm_params) #affine=affine, **norm_params) elif norm_type == 'sync_batch': affine = norm_params.pop('affine', True) norm_layer = dg.BatchNorm(num_features, **norm_params) F.BuildStrategy().sync_batch_norm = True elif norm_type == 'layer': norm_layer = dg.LayerNorm(num_features, **norm_params) elif norm_type == 'layer_2d': raise NotImplementedError() elif norm_type == 'adaptive': norm_layer = AdaptiveNorm(num_features, **norm_params) elif norm_type == 'spatially_adaptive': if input_dim != 2: raise ValueError("Spatially adaptive normalization layers only supports 2D input") norm_layer = SpatiallyAdaptiveNorm(num_features, **norm_params) elif norm_type == 'hyper_spatially_adaptive': if input_dim != 2: raise ValueError("Spatially adaptive normalization layers only supports 2D input") norm_layer = HyperSpatiallyAdaptiveNorm(num_features, **norm_params) else: raise ValueError("Activation norm layer %s is not recognized" % norm_type) return norm_layer