def __init__(self, embedding_dim, vocab_size, share_softmax_weights=False, use_bias=True, verbose=False, name=None): """ Initializes simple word embedding layer. Args: embedding_dim: An int scalar, the embedding dimension. vocab_size: An int scalar, the size of vocabulary. share_softmax_weights: A boolean, whether to share embedding table with target softmax weight. use_bias: A boolean, whether to use bias with target softmax weight. verbose: A boolean, whether to logging the parameters. name: The name of the layer. """ self._params = extract_constructor_params(locals(), verbose=False) super(WordEmbeddingSharedWeights, self).__init__(name=name) self._embedding_dim = embedding_dim self._vocab_size = vocab_size self._share_softmax_weights = share_softmax_weights self._use_bias = use_bias
def __init__(self, embedding_dim, vocab_size, share_softmax_weights=False, use_bias=True, verbose=False): """ Initializes simple word embedding layer. Args: embedding_dim: An int scalar, the embedding dimension. vocab_size: An int scalar, the size of vocabulary. share_softmax_weights: A boolean, whether to share embedding table with target softmax weight. use_bias: A boolean, whether to use bias with target softmax weight. verbose: A boolean, whether to logging the parameters. """ self._params = extract_constructor_params(locals(), verbose=False) super(WordEmbeddingSharedWeights, self).__init__() self._embedding_dim = embedding_dim self._vocab_size = vocab_size self._share_softmax_weights = share_softmax_weights self._use_bias = use_bias self._shared_weights = nn.Parameter(nn.init.normal_( torch.empty(vocab_size, embedding_dim), mean=0., std=embedding_dim**-0.5), requires_grad=True) self._bias = None if self._share_softmax_weights and self._use_bias: self._bias = nn.Parameter(torch.zeros(vocab_size), requires_grad=True)
def __init__(self, embedding_dim, channels=256, kernel_size=3, strides=2, layer_norm=True, verbose=False, name=None): """ Initializes the layer for subsample the audio feature. Args: embedding_dim: An int scalar, the embedding dimension. channels: The channel size of the convolution layer. kernel_size: The kernel size of the convolution layer. strides: The stride size of the convolution layer. layer_norm: Whether to apply layer normalization. verbose: A boolean, whether to logging the parameters. name: The name of the layer. """ self._params = extract_constructor_params(locals(), verbose=True) super(AudioConvSubsamplingLayer, self).__init__(name=name) self._embedding_dim = embedding_dim self._channels = channels self._kernel_size = kernel_size self._layer_norm = layer_norm self._strides = strides
def __init__(self, conv_layers, dropout=0.0, mode="default", conv_bias=False, verbose=False, name=None): """ Initializes wav2vec2's convolution layers. Args: conv_layers: A list of convolution layers, each of which is in form of [dim, kernel, stride]. dropout: The dropout rate of each conv layer. mode: The mode for feature extractor. "default" has a single group norm with d groups in the first conv block, whereas layer_norm has layer norms in every block (meant to use with normalize=True) conv_bias: Whether to include bias in conv encoder. verbose: A boolean, whether to logging the parameters. name: The name of the layer. """ self._params = extract_constructor_params(locals(), verbose=True) super(Wav2vec2FeatureExtractor, self).__init__(name=name) self._conv_layers_setting = conv_layers self._dropout = dropout self._mode = mode self._conv_bias = conv_bias self._conv_layers = []
def __init__(self, dim, kernel, stride, dropout_rate=0., use_bias=False, norm_type=None, name=None): """ Args: dim: The output dimension of this convolution layer. kernel: The kernel size. stride: The stride. dropout_rate: The dropout rate. use_bias: Whether to include bias in conv encoder. norm_type: The type of layer normalization, "layer" or "group" or None. name: The name of this layer """ self._params = extract_constructor_params(locals(), verbose=False) super(Wav2vec2ConvBlock, self).__init__(name=name) self._dim = dim self._kernel = kernel self._stride = stride self._dropout_rate = dropout_rate self._use_bias = use_bias self._norm_type = norm_type
def __init__(self, vocab_path=None, tokens=None, max_len=0, lowercase=False, bos_token="<SEQ_BEG>", eos_token="<SEQ_END>", unk_token="<UNK>", delimiter=" ", reverse=False): """ Initialize SymbolsMapper Args: vocab_path: The path to the vocabulary file. Only one of `vocab_path` and `tokens` should be provided. tokens: The word tokens. Only one of `vocab_path` and `tokens` should be provided. max_len: The maximum sequence length. Sequence larger than this will be truncated. lowercase: A bool, whether to lowercase the word tokens. bos_token: The begin-of-sentence token. eos_token: The end-of-sentence token. unk_token: The token indicating unknown word. reverse: A bool, whether to reverse the sequence or not. """ if not ((vocab_path is None) ^ (tokens is None)): raise ValueError("Either `vocab_path` or `tokens` should be provided.") this_locals = copy.copy(locals()) if tokens is None: with tf.io.gfile.GFile(vocab_path, "r") as fp: tokens = [line.strip() for line in fp] this_locals["tokens"] = tokens this_locals["vocab_path"] = None self._params = extract_constructor_params(this_locals, verbose=False) # extract tokens cleaned_tokens = [] for t in tokens: t = t.strip() if ((t.startswith("'") and t.endswith("'")) or (t.startswith('"') and t.endswith('"'))): word = t[1:-1] else: word = t.strip().split()[0].strip() if word: cleaned_tokens.append(word) assert unk_token, "must provide `unk_token`" extra_tokens = [unk_token] # add bos assert bos_token != unk_token extra_tokens.append(bos_token) # add eos assert eos_token != unk_token != bos_token while eos_token in cleaned_tokens: eos_token += str(random.choice(list(range(0, 10)))) extra_tokens.append(eos_token) self.vocab = Vocab(tokens=cleaned_tokens, extra_tokens=extra_tokens, lowercase=lowercase) self.max_len = max_len self.eos_id = self.vocab.map_token_to_id(eos_token) self.bos_id = self.vocab.map_token_to_id(bos_token) self.unk_id = self.vocab.map_token_to_id(unk_token) self.reverse = reverse self.delimiter = delimiter
def __init__(self, input_depth, num_heads, num_units, attention_key_depth=None, attention_value_depth=None, output_depth=None, attention_dropout_rate=0.1, attention_type="dot_product"): """ Initializes the multi head attention layer. Args: input_depth: The dimension of the input tensor. num_heads: A int scalar, the number of heads. num_units: A int scalar, the default units if other `depth` is not provided. attention_key_depth: A int scalar, the dimension for projected attention keys. If not provided, then use `num_units` as default. attention_value_depth: A int scalar, the dimension for projected attention values. If not provided, then use `num_units` as default. output_depth: A int scalar, the dimension for projected outputs. If not provided, then use `num_units` as default. attention_dropout_rate: A float scalar, the dropout rate for attention weight. attention_type: A string indicating the attention type. """ self._params = extract_constructor_params(locals(), verbose=False) super(MultiHeadAttention, self).__init__() self._input_depth = input_depth self._num_heads = num_heads self._num_units = num_units self._attention_key_depth = attention_key_depth or num_units self._attention_value_depth = attention_value_depth or num_units self._output_depth = output_depth or num_units self._attention_dropout_rate = attention_dropout_rate self._attention_type = attention_type if self._attention_key_depth % self._num_heads != 0: raise ValueError( "query depth ({}) must be divisible by the number of " "attention heads ({}).".format(self._attention_key_depth, self._num_heads)) if self._attention_value_depth % self._num_heads != 0: raise ValueError( "value depth ({}) must be divisible by the number of " "attention heads ({}).".format(self._attention_value_depth, self._num_heads)) # pre-create output transform layer self._output_transform_layer = MultiHeadDenseLayer( input_size=input_depth, output_units=self._output_depth, num_heads=self._num_heads, is_output_transform=True, use_bias=True) self._build_qkv_transform_layer()
def __init__(self, embedding_dim, input_dimension=80, input_channels=1, channels=256, kernel_size=3, strides=2, layer_norm=True, name=None): """ Initializes the layer for subsample the audio feature. Args: embedding_dim: An int scalar, the embedding dimension. input_channels: An int scalar, the number of input channels of the audio feature. input_dimension: An int scalar, the dimension of the audio feature. channels: The channel size of the convolution layer. kernel_size: The kernel size of the convolution layer. strides: The stride size of the convolution layer. layer_norm: Whether to apply layer normalization. verbose: A boolean, whether to logging the parameters. name: The name of the layer. """ self._params = extract_constructor_params(locals(), verbose=True) super(AudioConvSubsamplingLayer, self).__init__() self._input_channels = input_channels self._embedding_dim = embedding_dim self._channels = channels self._kernel_size = kernel_size self._layer_norm = layer_norm self._strides = strides num_pad = self._kernel_size // 2 self._conv_layer1 = nn.Conv2d(self._input_channels, self._channels, kernel_size=(self._kernel_size, self._kernel_size), stride=(self._strides, self._strides), padding=(num_pad, num_pad)) self._conv_layer2 = nn.Conv2d(self._channels, self._channels, kernel_size=(self._kernel_size, self._kernel_size), stride=(self._strides, self._strides), padding=(num_pad, num_pad)) if self._layer_norm: self._norm_layer1 = LayerNorm(self._channels, eps=1e-6) self._norm_layer2 = LayerNorm(self._channels, eps=1e-6) self._dense_layer = nn.Linear( ((input_dimension + self._strides - 1) // self._strides + self._strides - 1) // self._strides * self._channels, self._embedding_dim, bias=True)
def __init__(self, embedding_dim, vocab_size, max_positions, token_types, dropout_rate=0.0, epsilon=1e-12, name=None): self._params = extract_constructor_params(locals(), verbose=False) super(BertEmbedding, self).__init__(name=name) self._embedding_dim = embedding_dim self._vocab_size = vocab_size self._max_positions = max_positions self._token_types = token_types self._dropout_rate = dropout_rate self._epsilon = epsilon
def __init__(self, name=None, **kwargs): """ Initializes the parameters of the decoder. """ self._params = extract_constructor_params(locals(), verbose=False) super(Decoder, self).__init__(name=name)
def __init__(self, **kwargs): """ Initializes the parameters of the encoders. """ self._params = extract_constructor_params(locals(), verbose=False) super(Encoder, self).__init__()
def __init__(self, **kwargs): self._params = extract_constructor_params(locals(), verbose=False)