コード例 #1
0
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        See 'texar.tf.modules.decoders.transformer_decoders.TransformerDecoder' for details
        """
        return {
            "num_blocks": 6,
            "dim": 512,
            "embedding_tie": True,
            "output_layer_bias": False,
            "max_decoding_length": int(1e10),
            "embedding_dropout": 0.1,
            "residual_dropout": 0.1,
            "poswise_feedforward": default_transformer_poswise_net_hparams(),
            'graph_multihead_attention': {
                'name': 'graph_multihead_attention',
                'num_units': 512,
                'num_heads': 8,
                'dropout_rate': 0.1,
                'output_dim': 512,
                'use_bias': False,
            },
            "initializer": None,
            "name": "cross_graph_transformer_sequential_decoder",
        }
コード例 #2
0
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        See 'texar.tf.modules.encoders.transformer_encoders.TransformerEncoder' for details
        """
        return {
            'num_blocks': 6,
            'dim': 512,
            'use_bert_config': False,
            'use_adj': False,
            'embedding_dropout': 0.1,
            'residual_dropout': 0.1,
            'poswise_feedforward': default_transformer_poswise_net_hparams(),
            'graph_multihead_attention': {
                'name': 'graph_multihead_attention',
                'num_units': 512,
                'num_heads': 8,
                'dropout_rate': 0.1,
                'output_dim': 512,
                'use_bias': False,
            },
            'initializer': None,
            'name': 'cross_graph_transformer_fixed_length_decoder',
            'embedding_tie': True,
            'output_layer_bias': False,
            'max_decoding_length': int(1e10),
        }
コード例 #3
0
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        .. code-block:: python

            {
                # Same as in TransformerEncoder
                "num_blocks": 6,
                "dim": 512,
                "embedding_dropout": 0.1,
                "residual_dropout": 0.1,
                "poswise_feedforward": default_transformer_poswise_net_hparams,
                "multihead_attention": {
                    'name': 'multihead_attention',
                    'num_units': 512,
                    'output_dim': 512,
                    'num_heads': 8,
                    'dropout_rate': 0.1,
                    'output_dim': 512,
                    'use_bias': False,
                },
                "initializer": None,
                "name": "transformer_decoder"
                # Additional for TransformerDecoder
                "embedding_tie": True,
                "output_layer_bias": False,
                "max_decoding_length": int(1e10),
            }

        Here:

        "num_blocks": int
            Number of stacked blocks.

        "dim": int
            Hidden dimension of the encoder.

        "embedding_dropout": float
            Dropout rate of the input word and position embeddings.

        "residual_dropout":  float
            Dropout rate of the residual connections.

        "poswise_feedforward": dict
            Hyperparameters for a feed-forward network used in residual
            connections.
            Make sure the dimension of the output tensor is equal to `dim`.

            See :func:`~texar.tf.modules.default_transformer_poswise_net_hparams`
            for details.

        "multihead_attention": dict
            Hyperparameters for the multihead attention strategy.
            Make sure the `output_dim` in this module is equal to `dim`.

            See :func:`~texar.tf.modules.MultiheadAttentionEncoder.default_hparams`
            for details.

        "initializer": dict, optional
            Hyperparameters of the default initializer that initializes
            variables created in this module.
            See :func:`~texar.tf.core.get_initializer` for details.

        "output_layer_bias": bool
            Whether to use bias to the output layer.
            Used only if :attr:`output_layer` is `None` when constructing
            the class instance.

        "max_decoding_length": int
            The maximum allowed number of decoding steps.
            Set to a very large number of avoid the length constraint.
            Ignored if provided in :meth:`_build` or
            "train_greedy" decoding is used.

            Length penalty coefficient. Refer to
            https://arxiv.org/abs/1609.08144 for more details.

        "name": str
            Name of the module.
        """
        return {
            "num_blocks": 6,
            "dim": 512,
            "embedding_tie": True,
            "output_layer_bias": False,
            "max_decoding_length": int(1e10),
            "embedding_dropout": 0.1,
            "residual_dropout": 0.1,
            "poswise_feedforward": default_transformer_poswise_net_hparams(),
            'multihead_attention': {
                'name': 'multihead_attention',
                'num_units': 512,
                'num_heads': 8,
                'dropout_rate': 0.1,
                'output_dim': 512,
                'use_bias': False,
            },
            "initializer": None,
            "name": "transformer_decoder",
        }