Ejemplo n.º 1
0
def _build_dense_output_layer(cell_output_size: int,
                              hparams: HParams) -> Optional[nn.Sequential]:
    r"""Build the output layers.

    Args:
        cell_output_size: The output size of the rnn cell.
        hparams (dict or HParams): Hyperparameters. Missing hyperparameters
            will be set to default values. See
            :meth:`default_hparams` for the hyperparameter structure and
            default values.

    Returns:
        A :torch_nn:`Sequential` module containing the output layers.
    """
    nlayers = hparams.num_layers

    if nlayers <= 0:
        return None

    layer_size = _to_list(hparams.layer_size, 'output_layer.layer_size',
                          nlayers)

    dropout_layer_ids = _to_list(hparams.dropout_layer_ids)

    other_kwargs = hparams.other_dense_kwargs or {}
    if isinstance(other_kwargs, HParams):
        other_kwargs = other_kwargs.todict()
    if not isinstance(other_kwargs, dict):
        raise ValueError(
            "hparams 'output_layer.other_dense_kwargs' must be a dict.")

    output_layers: List[nn.Module] = []
    for i in range(nlayers):
        if i in dropout_layer_ids:
            # TODO: Variational dropout is not implemented.
            output_layers.append(nn.Dropout(p=hparams.dropout_rate))

        dense_layer = nn.Linear(
            in_features=(cell_output_size if i == 0 else layer_size[i - 1]),
            out_features=layer_size[i],
            **other_kwargs)

        output_layers.append(dense_layer)

        if i == nlayers - 1:
            activation = hparams.final_layer_activation
        else:
            activation = hparams.activation

        if activation is not None:
            layer_hparams = {"type": activation, "kwargs": {}}
            activation_layer = layers.get_layer(hparams=layer_hparams)
            output_layers.append(activation_layer)

    if nlayers in dropout_layer_ids:
        output_layers.append(nn.Dropout(p=hparams.dropout_rate))

    return nn.Sequential(*output_layers)
Ejemplo n.º 2
0
def _forward_output_layers(inputs,
                           input_size,
                           output_layer,
                           time_major,
                           hparams,
                           mode,
                           sequence_length=None):
    """Forwards inputs through the output layers.

    Args:
        inputs: A Tensor of shape `[batch_size, max_time] + input_size` if
            :attr:`time_major=False`, or shape
            `[max_time, batch_size] + input_size` if :attr:`time_major=True`.

    Returns:
        A pair :attr:`(outputs, outputs_size), where

        - :attr:`outputs`: A Tensor of shape \
          `[batch_size, max_time] + outputs_size`.

        - :attr:`outputs_size`: An `int` or 1D `int` array representing the \
          output size.
    """
    if output_layer is None:
        return inputs, input_size

    if hparams is None:
        # output_layer was passed in from the constructor
        if isinstance(output_layer, (list, tuple)):
            raise ValueError('output_layer must not be a list or tuple.')
        output, output_size = _forward_single_output_layer(
            inputs, input_size, output_layer)
    else:
        # output_layer was built based on hparams
        output_layer = _to_list(output_layer)

        dropout_layer_ids = _to_list(hparams.dropout_layer_ids)
        if len(dropout_layer_ids) > 0:
            training = is_train_mode(mode)

        output = inputs
        output_size = input_size
        for i, layer in enumerate(output_layer):
            if i in dropout_layer_ids:
                output = _apply_dropout(output, time_major, hparams, training)
            output, output_size = _forward_single_output_layer(
                output, output_size, layer)

        if len(output_layer) in dropout_layer_ids:
            output = _apply_dropout(output, time_major, hparams, training)

    if sequence_length is not None:
        output = mask_sequences(output,
                                sequence_length,
                                time_major=time_major,
                                tensor_rank=3)

    return output, output_size
Ejemplo n.º 3
0
def _build_dense_output_layer(hparams):
    nlayers = hparams.num_layers

    if nlayers <= 0:
        return None

    layer_size = _to_list(
        hparams.layer_size, 'output_layer.layer_size', nlayers)

    other_kwargs = hparams.other_dense_kwargs or {}
    if isinstance(other_kwargs, HParams):
        other_kwargs = other_kwargs.todict()
    if not isinstance(other_kwargs, dict):
        raise ValueError(
            "hparams 'output_layer.other_dense_kwargs' must be a dict.")

    dense_layers = []
    for i in range(nlayers):
        if i == nlayers - 1:
            activation = hparams.final_layer_activation
        else:
            activation = hparams.activation

        kwargs_i = {"units": layer_size[i],
                    "activation": activation,
                    "name": "dense_%d" % (i+1)}
        kwargs_i.update(other_kwargs)

        layer_hparams = {"type": "Dense", "kwargs": kwargs_i}
        dense_layers.append(layers.get_layer(hparams=layer_hparams))

    if len(dense_layers) == 1:
        dense_layers = dense_layers[0]

    return dense_layers