def default_hparams(): """Returns a dictionary of hyperparameters with default values. See 'texar.tf.modules.decoders.transformer_decoders.TransformerDecoder' for details """ return { "num_blocks": 6, "dim": 512, "embedding_tie": True, "output_layer_bias": False, "max_decoding_length": int(1e10), "embedding_dropout": 0.1, "residual_dropout": 0.1, "poswise_feedforward": default_transformer_poswise_net_hparams(), 'graph_multihead_attention': { 'name': 'graph_multihead_attention', 'num_units': 512, 'num_heads': 8, 'dropout_rate': 0.1, 'output_dim': 512, 'use_bias': False, }, "initializer": None, "name": "cross_graph_transformer_sequential_decoder", }
def default_hparams(): """Returns a dictionary of hyperparameters with default values. See 'texar.tf.modules.encoders.transformer_encoders.TransformerEncoder' for details """ return { 'num_blocks': 6, 'dim': 512, 'use_bert_config': False, 'use_adj': False, 'embedding_dropout': 0.1, 'residual_dropout': 0.1, 'poswise_feedforward': default_transformer_poswise_net_hparams(), 'graph_multihead_attention': { 'name': 'graph_multihead_attention', 'num_units': 512, 'num_heads': 8, 'dropout_rate': 0.1, 'output_dim': 512, 'use_bias': False, }, 'initializer': None, 'name': 'cross_graph_transformer_fixed_length_decoder', 'embedding_tie': True, 'output_layer_bias': False, 'max_decoding_length': int(1e10), }
def default_hparams(): """Returns a dictionary of hyperparameters with default values. .. code-block:: python { # Same as in TransformerEncoder "num_blocks": 6, "dim": 512, "embedding_dropout": 0.1, "residual_dropout": 0.1, "poswise_feedforward": default_transformer_poswise_net_hparams, "multihead_attention": { 'name': 'multihead_attention', 'num_units': 512, 'output_dim': 512, 'num_heads': 8, 'dropout_rate': 0.1, 'output_dim': 512, 'use_bias': False, }, "initializer": None, "name": "transformer_decoder" # Additional for TransformerDecoder "embedding_tie": True, "output_layer_bias": False, "max_decoding_length": int(1e10), } Here: "num_blocks": int Number of stacked blocks. "dim": int Hidden dimension of the encoder. "embedding_dropout": float Dropout rate of the input word and position embeddings. "residual_dropout": float Dropout rate of the residual connections. "poswise_feedforward": dict Hyperparameters for a feed-forward network used in residual connections. Make sure the dimension of the output tensor is equal to `dim`. See :func:`~texar.tf.modules.default_transformer_poswise_net_hparams` for details. "multihead_attention": dict Hyperparameters for the multihead attention strategy. Make sure the `output_dim` in this module is equal to `dim`. See :func:`~texar.tf.modules.MultiheadAttentionEncoder.default_hparams` for details. "initializer": dict, optional Hyperparameters of the default initializer that initializes variables created in this module. See :func:`~texar.tf.core.get_initializer` for details. "output_layer_bias": bool Whether to use bias to the output layer. Used only if :attr:`output_layer` is `None` when constructing the class instance. "max_decoding_length": int The maximum allowed number of decoding steps. Set to a very large number of avoid the length constraint. Ignored if provided in :meth:`_build` or "train_greedy" decoding is used. Length penalty coefficient. Refer to https://arxiv.org/abs/1609.08144 for more details. "name": str Name of the module. """ return { "num_blocks": 6, "dim": 512, "embedding_tie": True, "output_layer_bias": False, "max_decoding_length": int(1e10), "embedding_dropout": 0.1, "residual_dropout": 0.1, "poswise_feedforward": default_transformer_poswise_net_hparams(), 'multihead_attention': { 'name': 'multihead_attention', 'num_units': 512, 'num_heads': 8, 'dropout_rate': 0.1, 'output_dim': 512, 'use_bias': False, }, "initializer": None, "name": "transformer_decoder", }