Exemple #1
0
    def test_get_embedding(self):
        """Tests :func:`~texar.modules.embedder.embedder_utils.get_embedding`.
        """
        vocab_size = 100
        emb = embedder_utils.get_embedding(num_embeds=vocab_size)
        self.assertEqual(emb.shape[0], vocab_size)
        self.assertEqual(emb.shape[1],
                         embedder_utils.default_embedding_hparams()["dim"])

        hparams = {
            "initializer": {
                "type": "torch.nn.init.uniform_",
                "kwargs": {
                    'a': -0.1,
                    'b': 0.1
                }
            }
        }
        emb = embedder_utils.get_embedding(
            hparams=hparams,
            num_embeds=vocab_size,
        )
        self.assertEqual(emb.shape[0], vocab_size)
        self.assertEqual(emb.shape[1],
                         embedder_utils.default_embedding_hparams()["dim"])
Exemple #2
0
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        .. code-block:: python

            {
                "dim": 100,
                "initializer": {
                    "type": "random_uniform_initializer",
                    "kwargs": {
                        "minval": -0.1,
                        "maxval": 0.1,
                        "seed": None
                    }
                },
                "regularizer": {
                    "type": "L1L2",
                    "kwargs": {
                        "l1": 0.,
                        "l2": 0.
                    }
                },
                "dropout_rate": 0,
                "trainable": True,
                "name": "position_embedder"
            }

        The hyperparameters have the same meaning as those in
        :meth:`texar.modules.WordEmbedder.default_hparams`.
        """
        hparams = embedder_utils.default_embedding_hparams()
        hparams["name"] = "position_embedder"
        return hparams
Exemple #3
0
    def default_hparams():
        # TODO Shibiao: add regularizer
        r"""Returns a dictionary of hyperparameters with default values.

        .. code-block:: python

            {
                "dim": 100,
                "dropout_rate": 0,
                "dropout_strategy": 'element',
                "initializer": {
                    "type": "random_uniform_initializer",
                    "kwargs": {
                        "minval": -0.1,
                        "maxval": 0.1,
                        "seed": None
                    }
                },
                "name": "word_embedder",
            }

        Here:

        `"dim"`: int or list
            Embedding dimension. Can be a list of integers to yield embeddings
            with dimensionality > 1.

            Ignored if :attr:`init_value` is given to the embedder constructor.

        `"dropout_rate"`: float
            The dropout rate between 0 and 1. E.g., ``dropout_rate=0.1`` would
            drop out 10% of the embedding. Set to 0 to disable dropout.

        `"dropout_strategy"`: str
            The dropout strategy. Can be one of the following

            - ``"element"``: The regular strategy that drops individual elements
              in the embedding vectors.
            - ``"item"``: Drops individual items (e.g., words) entirely. E.g.,
              for the word sequence "the simpler the better", the strategy can
              yield "_ simpler the better", where the first "the" is dropped.
            - ``"item_type"``: Drops item types (e.g., word types). E.g., for
              the above sequence, the strategy can yield "_ simpler _ better",
              where the word type "the" is dropped. The dropout will never
              yield "_ simpler the better" as in the ``"item"`` strategy.

        `"initializer"`: dict or None
            Hyperparameters of the initializer for embedding values. See
            :func:`~texar.core.get_initializer` for the details. Ignored if
            :attr:`init_value` is given to the embedder constructor.

        `"name"`: str
            Name of the embedding variable.
        """
        hparams = embedder_utils.default_embedding_hparams()
        hparams["name"] = "word_embedder"
        return hparams
Exemple #4
0
    def test_get_embedding(self):
        """Tests :func:`~texar.modules.embedder.embedder_utils.get_embedding`.
        """
        vocab_size = 100
        emb = embedder_utils.get_embedding(num_embeds=vocab_size)
        self.assertEqual(emb.shape[0].value, vocab_size)
        self.assertEqual(emb.shape[1].value,
                         embedder_utils.default_embedding_hparams()["dim"])

        hparams = {
            "initializer": {
                "type": tf.random_uniform_initializer(minval=-0.1, maxval=0.1)
            },
            "regularizer": {
                "type": tf.keras.regularizers.L1L2(0.1, 0.1)
            }
        }
        emb = embedder_utils.get_embedding(hparams=hparams,
                                           num_embeds=vocab_size,
                                           variable_scope='embedding_2')
        self.assertEqual(emb.shape[0].value, vocab_size)
        self.assertEqual(emb.shape[1].value,
                         embedder_utils.default_embedding_hparams()["dim"])
Exemple #5
0
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        Returns:
            A dictionary with the following structure and values.

            .. code-block:: python

                {
                    "name": "word_embedder",
                    "dim": 100,
                    "initializer": {
                        "type": "random_uniform_initializer",
                        "kwargs": {
                            "minval": -0.1,
                            "maxval": 0.1,
                            "seed": None
                        }
                    },
                    "regularizer": {
                        "type": "L1L2",
                        "kwargs": {
                            "l1": 0.,
                            "l2": 0.
                        }
                    },
                    "dropout_rate": 0,
                    "dropout_strategy": 'element',
                    "trainable": True,
                }

            See :func:`~texar.modules.default_embedding_hparams` for more
            details.
        """
        hparams = embedder_utils.default_embedding_hparams()
        hparams["name"] = "word_embedder"
        return hparams
Exemple #6
0
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        .. code-block:: python

            {
                "dim": 100,
                "dropout_rate": 0,
                "dropout_strategy": 'element',
                "trainable": True,
                "initializer": {
                    "type": "random_uniform_initializer",
                    "kwargs": {
                        "minval": -0.1,
                        "maxval": 0.1,
                        "seed": None
                    }
                },
                "regularizer": {
                    "type": "L1L2",
                    "kwargs": {
                        "l1": 0.,
                        "l2": 0.
                    }
                },
                "name": "word_embedder",
            }

        Here:

        "dim" : int or list
            Embedding dimension. Can be a list of integers to yield embeddings
            with dimensionality > 1.

            Ignored if :attr:`init_value` is given to the embedder constructor.

        "dropout_rate" : float
            The dropout rate between 0 and 1. E.g., `dropout_rate=0.1` would
            drop out 10% of the embedding. Set to 0 to disable dropout.

        "dropout_strategy" : str
            The dropout strategy. Can be one of the following

            - :attr:`"element"`: The regular strategy that drops individual \
            elements of embedding vectors.
            - :attr:`"item"`: Drops individual items (e.g., words) entirely. \
            E.g., for \
            the word sequence 'the simpler the better', the strategy can \
            yield '_ simpler the better', where the first `the` is dropped.
            - :attr:`"item_type"`: Drops item types (e.g., word types). \
            E.g., for the \
            above sequence, the strategy can yield '_ simpler _ better', \
            where the word type 'the' is dropped. The dropout will never \
            yield '_ simpler the better' as in the 'item' strategy.

        "trainable" : bool
            Whether the embedding is trainable.

        "initializer" : dict or None
            Hyperparameters of the initializer for embedding values. See
            :func:`~texar.core.get_initializer` for the details. Ignored if
            :attr:`init_value` is given to the embedder constructor.

        "regularizer" : dict
            Hyperparameters of the regularizer for embedding values. See
            :func:`~texar.core.get_regularizer` for the details.

        "name" : str
            Name of the embedding variable.
        """
        hparams = embedder_utils.default_embedding_hparams()
        hparams["name"] = "word_embedder"
        return hparams