def _predict_modules( self, vocab_size, pred_n_hidden, pred_rnn_layers, forget_gate_bias, t_max, norm, weights_init_scale, hidden_hidden_bias_scale, dropout, rnn_hidden_size, ): """ Prepare the trainable parameters of the Prediction Network. Args: vocab_size: Vocab size (excluding the blank token). pred_n_hidden: Hidden size of the RNNs. pred_rnn_layers: Number of RNN layers. forget_gate_bias: Whether to perform unit forget gate bias. t_max: Whether to perform Chrono LSTM init. norm: Type of normalization to perform in RNN. weights_init_scale: Float scale of the weights after initialization. Setting to lower than one sometimes helps reduce variance between runs. hidden_hidden_bias_scale: Float scale for the hidden-to-hidden bias scale. Set to 0.0 for the default behaviour. dropout: Whether to apply dropout to RNN. rnn_hidden_size: the hidden size of the RNN, if not specified, pred_n_hidden would be used """ if self.blank_as_pad: embed = torch.nn.Embedding(vocab_size + 1, pred_n_hidden, padding_idx=self.blank_idx) else: embed = torch.nn.Embedding(vocab_size, pred_n_hidden) layers = torch.nn.ModuleDict( { "embed": embed, "dec_rnn": rnn.rnn( input_size=pred_n_hidden, hidden_size=rnn_hidden_size if rnn_hidden_size > 0 else pred_n_hidden, num_layers=pred_rnn_layers, norm=norm, forget_gate_bias=forget_gate_bias, t_max=t_max, dropout=dropout, weights_init_scale=weights_init_scale, hidden_hidden_bias_scale=hidden_hidden_bias_scale, proj_size=pred_n_hidden if pred_n_hidden < rnn_hidden_size else 0, ), } ) return layers
def _predict(self, vocab_size, pred_n_hidden, pred_rnn_layers, forget_gate_bias, t_max, norm, dropout): """ Prepare the trainable parameters of the Prediction Network. Args: vocab_size: Vocab size (excluding the blank token). pred_n_hidden: Hidden size of the RNNs. pred_rnn_layers: Number of RNN layers. forget_gate_bias: Whether to perform unit forget gate bias. t_max: Whether to perform Chrono LSTM init. norm: Type of normalization to perform in RNN. dropout: Whether to apply dropout to RNN. """ if self.blank_as_pad: embed = torch.nn.Embedding(vocab_size + 1, pred_n_hidden, padding_idx=self.blank_idx) else: embed = torch.nn.Embedding(vocab_size, pred_n_hidden) layers = torch.nn.ModuleDict({ "embed": embed, "dec_rnn": rnn.rnn( input_size=pred_n_hidden, hidden_size=pred_n_hidden, num_layers=pred_rnn_layers, norm=norm, forget_gate_bias=forget_gate_bias, t_max=t_max, dropout=dropout, ), }) return layers