Ejemplo n.º 1
0
    def __init__(self,
                 embedding_dim,
                 vocab_size,
                 share_softmax_weights=False,
                 use_bias=True,
                 verbose=False,
                 name=None):
        """ Initializes simple word embedding layer.

        Args:
            embedding_dim: An int scalar, the embedding dimension.
            vocab_size: An int scalar, the size of vocabulary.
            share_softmax_weights: A boolean, whether to share
                embedding table with target softmax weight.
            use_bias: A boolean, whether to use bias with target
                softmax weight.
            verbose: A boolean, whether to logging the parameters.
            name: The name of the layer.
        """
        self._params = extract_constructor_params(locals(), verbose=False)
        super(WordEmbeddingSharedWeights, self).__init__(name=name)
        self._embedding_dim = embedding_dim
        self._vocab_size = vocab_size
        self._share_softmax_weights = share_softmax_weights
        self._use_bias = use_bias
Ejemplo n.º 2
0
    def __init__(self,
                 embedding_dim,
                 vocab_size,
                 share_softmax_weights=False,
                 use_bias=True,
                 verbose=False):
        """ Initializes simple word embedding layer.

        Args:
            embedding_dim: An int scalar, the embedding dimension.
            vocab_size: An int scalar, the size of vocabulary.
            share_softmax_weights: A boolean, whether to share
                embedding table with target softmax weight.
            use_bias: A boolean, whether to use bias with target
                softmax weight.
            verbose: A boolean, whether to logging the parameters.
        """
        self._params = extract_constructor_params(locals(), verbose=False)
        super(WordEmbeddingSharedWeights, self).__init__()
        self._embedding_dim = embedding_dim
        self._vocab_size = vocab_size
        self._share_softmax_weights = share_softmax_weights
        self._use_bias = use_bias
        self._shared_weights = nn.Parameter(nn.init.normal_(
            torch.empty(vocab_size, embedding_dim),
            mean=0.,
            std=embedding_dim**-0.5),
                                            requires_grad=True)
        self._bias = None
        if self._share_softmax_weights and self._use_bias:
            self._bias = nn.Parameter(torch.zeros(vocab_size),
                                      requires_grad=True)
Ejemplo n.º 3
0
    def __init__(self,
                 embedding_dim,
                 channels=256,
                 kernel_size=3,
                 strides=2,
                 layer_norm=True,
                 verbose=False,
                 name=None):
        """ Initializes the layer for subsample the audio feature.

        Args:
            embedding_dim: An int scalar, the embedding dimension.
            channels: The channel size of the convolution layer.
            kernel_size: The kernel size of the convolution layer.
            strides: The stride size of the convolution layer.
            layer_norm: Whether to apply layer normalization.
            verbose: A boolean, whether to logging the parameters.
            name: The name of the layer.
        """
        self._params = extract_constructor_params(locals(), verbose=True)
        super(AudioConvSubsamplingLayer, self).__init__(name=name)
        self._embedding_dim = embedding_dim
        self._channels = channels
        self._kernel_size = kernel_size
        self._layer_norm = layer_norm
        self._strides = strides
Ejemplo n.º 4
0
    def __init__(self,
                 conv_layers,
                 dropout=0.0,
                 mode="default",
                 conv_bias=False,
                 verbose=False,
                 name=None):
        """ Initializes wav2vec2's convolution layers.

        Args:
            conv_layers: A list of convolution layers, each of which is in form of [dim, kernel, stride].
            dropout: The dropout rate of each conv layer.
            mode: The mode for feature extractor. "default" has a single group norm with d
                groups in the first conv block, whereas layer_norm has layer norms in
                every block (meant to use with normalize=True)
            conv_bias: Whether to include bias in conv encoder.
            verbose: A boolean, whether to logging the parameters.
            name: The name of the layer.
        """
        self._params = extract_constructor_params(locals(), verbose=True)
        super(Wav2vec2FeatureExtractor, self).__init__(name=name)
        self._conv_layers_setting = conv_layers
        self._dropout = dropout
        self._mode = mode
        self._conv_bias = conv_bias
        self._conv_layers = []
Ejemplo n.º 5
0
    def __init__(self,
                 dim,
                 kernel,
                 stride,
                 dropout_rate=0.,
                 use_bias=False,
                 norm_type=None,
                 name=None):
        """

        Args:
            dim: The output dimension of this convolution layer.
            kernel: The kernel size.
            stride: The stride.
            dropout_rate: The dropout rate.
            use_bias: Whether to include bias in conv encoder.
            norm_type: The type of layer normalization, "layer" or "group" or None.
            name: The name of this layer
        """
        self._params = extract_constructor_params(locals(), verbose=False)
        super(Wav2vec2ConvBlock, self).__init__(name=name)
        self._dim = dim
        self._kernel = kernel
        self._stride = stride
        self._dropout_rate = dropout_rate
        self._use_bias = use_bias
        self._norm_type = norm_type
Ejemplo n.º 6
0
    def __init__(self,
                 vocab_path=None,
                 tokens=None,
                 max_len=0,
                 lowercase=False,
                 bos_token="<SEQ_BEG>",
                 eos_token="<SEQ_END>",
                 unk_token="<UNK>",
                 delimiter=" ",
                 reverse=False):
        """ Initialize SymbolsMapper

        Args:
            vocab_path: The path to the vocabulary file. Only one of `vocab_path` and `tokens` should be provided.
            tokens: The word tokens. Only one of `vocab_path` and `tokens` should be provided.
            max_len: The maximum sequence length. Sequence larger than this will be truncated.
            lowercase: A bool, whether to lowercase the word tokens.
            bos_token: The begin-of-sentence token.
            eos_token: The end-of-sentence token.
            unk_token: The token indicating unknown word.
            reverse: A bool, whether to reverse the sequence or not.
        """
        if not ((vocab_path is None) ^ (tokens is None)):
            raise ValueError("Either `vocab_path` or `tokens` should be provided.")
        this_locals = copy.copy(locals())
        if tokens is None:
            with tf.io.gfile.GFile(vocab_path, "r") as fp:
                tokens = [line.strip() for line in fp]
            this_locals["tokens"] = tokens
            this_locals["vocab_path"] = None
        self._params = extract_constructor_params(this_locals, verbose=False)
        # extract tokens
        cleaned_tokens = []
        for t in tokens:
            t = t.strip()
            if ((t.startswith("'") and t.endswith("'"))
                or (t.startswith('"') and t.endswith('"'))):
                word = t[1:-1]
            else:
                word = t.strip().split()[0].strip()
            if word:
                cleaned_tokens.append(word)
        assert unk_token, "must provide `unk_token`"
        extra_tokens = [unk_token]
        # add bos
        assert bos_token != unk_token
        extra_tokens.append(bos_token)
        # add eos
        assert eos_token != unk_token != bos_token
        while eos_token in cleaned_tokens:
            eos_token += str(random.choice(list(range(0, 10))))
        extra_tokens.append(eos_token)
        self.vocab = Vocab(tokens=cleaned_tokens, extra_tokens=extra_tokens,
                           lowercase=lowercase)
        self.max_len = max_len
        self.eos_id = self.vocab.map_token_to_id(eos_token)
        self.bos_id = self.vocab.map_token_to_id(bos_token)
        self.unk_id = self.vocab.map_token_to_id(unk_token)
        self.reverse = reverse
        self.delimiter = delimiter
Ejemplo n.º 7
0
    def __init__(self,
                 input_depth,
                 num_heads,
                 num_units,
                 attention_key_depth=None,
                 attention_value_depth=None,
                 output_depth=None,
                 attention_dropout_rate=0.1,
                 attention_type="dot_product"):
        """ Initializes the multi head attention layer.

        Args:
            input_depth: The dimension of the input tensor.
            num_heads: A int scalar, the number of heads.
            num_units: A int scalar, the default units if other `depth` is
                not provided.
            attention_key_depth: A int scalar, the dimension for projected
                attention keys. If not provided, then use `num_units` as default.
            attention_value_depth: A int scalar, the dimension for projected
                attention values. If not provided, then use `num_units` as default.
            output_depth: A int scalar, the dimension for projected
                outputs. If not provided, then use `num_units` as default.
            attention_dropout_rate: A float scalar, the dropout rate for attention weight.
            attention_type: A string indicating the attention type.
        """
        self._params = extract_constructor_params(locals(), verbose=False)
        super(MultiHeadAttention, self).__init__()
        self._input_depth = input_depth
        self._num_heads = num_heads
        self._num_units = num_units
        self._attention_key_depth = attention_key_depth or num_units
        self._attention_value_depth = attention_value_depth or num_units
        self._output_depth = output_depth or num_units
        self._attention_dropout_rate = attention_dropout_rate
        self._attention_type = attention_type
        if self._attention_key_depth % self._num_heads != 0:
            raise ValueError(
                "query depth ({}) must be divisible by the number of "
                "attention heads ({}).".format(self._attention_key_depth,
                                               self._num_heads))
        if self._attention_value_depth % self._num_heads != 0:
            raise ValueError(
                "value depth ({}) must be divisible by the number of "
                "attention heads ({}).".format(self._attention_value_depth,
                                               self._num_heads))
        # pre-create output transform layer
        self._output_transform_layer = MultiHeadDenseLayer(
            input_size=input_depth,
            output_units=self._output_depth,
            num_heads=self._num_heads,
            is_output_transform=True,
            use_bias=True)
        self._build_qkv_transform_layer()
Ejemplo n.º 8
0
    def __init__(self,
                 embedding_dim,
                 input_dimension=80,
                 input_channels=1,
                 channels=256,
                 kernel_size=3,
                 strides=2,
                 layer_norm=True,
                 name=None):
        """ Initializes the layer for subsample the audio feature.

        Args:
            embedding_dim: An int scalar, the embedding dimension.
            input_channels: An int scalar, the number of input channels of the audio feature.
            input_dimension: An int scalar, the dimension of the audio feature.
            channels: The channel size of the convolution layer.
            kernel_size: The kernel size of the convolution layer.
            strides: The stride size of the convolution layer.
            layer_norm: Whether to apply layer normalization.
            verbose: A boolean, whether to logging the parameters.
            name: The name of the layer.
        """
        self._params = extract_constructor_params(locals(), verbose=True)
        super(AudioConvSubsamplingLayer, self).__init__()
        self._input_channels = input_channels
        self._embedding_dim = embedding_dim
        self._channels = channels
        self._kernel_size = kernel_size
        self._layer_norm = layer_norm
        self._strides = strides
        num_pad = self._kernel_size // 2
        self._conv_layer1 = nn.Conv2d(self._input_channels,
                                      self._channels,
                                      kernel_size=(self._kernel_size,
                                                   self._kernel_size),
                                      stride=(self._strides, self._strides),
                                      padding=(num_pad, num_pad))
        self._conv_layer2 = nn.Conv2d(self._channels,
                                      self._channels,
                                      kernel_size=(self._kernel_size,
                                                   self._kernel_size),
                                      stride=(self._strides, self._strides),
                                      padding=(num_pad, num_pad))
        if self._layer_norm:
            self._norm_layer1 = LayerNorm(self._channels, eps=1e-6)
            self._norm_layer2 = LayerNorm(self._channels, eps=1e-6)
        self._dense_layer = nn.Linear(
            ((input_dimension + self._strides - 1) // self._strides +
             self._strides - 1) // self._strides * self._channels,
            self._embedding_dim,
            bias=True)
Ejemplo n.º 9
0
 def __init__(self,
              embedding_dim,
              vocab_size,
              max_positions,
              token_types,
              dropout_rate=0.0,
              epsilon=1e-12,
              name=None):
     self._params = extract_constructor_params(locals(), verbose=False)
     super(BertEmbedding, self).__init__(name=name)
     self._embedding_dim = embedding_dim
     self._vocab_size = vocab_size
     self._max_positions = max_positions
     self._token_types = token_types
     self._dropout_rate = dropout_rate
     self._epsilon = epsilon
Ejemplo n.º 10
0
 def __init__(self, name=None, **kwargs):
     """ Initializes the parameters of the decoder. """
     self._params = extract_constructor_params(locals(), verbose=False)
     super(Decoder, self).__init__(name=name)
Ejemplo n.º 11
0
 def __init__(self, **kwargs):
     """ Initializes the parameters of the encoders. """
     self._params = extract_constructor_params(locals(), verbose=False)
     super(Encoder, self).__init__()
Ejemplo n.º 12
0
 def __init__(self, **kwargs):
     self._params = extract_constructor_params(locals(), verbose=False)