def __init__(self, input_size=1024, context_size=1024, hidden_size=1024, num_layers=1, batch_first=False, dropout=0.2, init_weight=0.1): """ Constructor for the RecurrentAttention. :param input_size: number of features in input tensor :param context_size: number of features in output from encoder :param hidden_size: internal hidden size :param num_layers: number of layers in LSTM :param batch_first: if True the model uses (batch,seq,feature) tensors, if false the model uses (seq, batch, feature) :param dropout: probability of dropout (on input to LSTM layer) :param init_weight: range for the uniform initializer """ super(RecurrentAttention, self).__init__() self.rnn = nn.LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=batch_first) init_lstm_(self.rnn, init_weight) self.attn = BahdanauAttention(hidden_size, context_size, context_size, normalize=True, batch_first=batch_first) self.dropout = nn.Dropout(dropout)
def __init__(self, vocab_size, hidden_size=1024, num_layers=4, dropout=0.2, batch_first=False, embedder=None, init_weight=0.1, fusion=True): """ Constructor of the ResidualRecurrentDecoder. :param vocab_size: size of vocabulary :param hidden_size: hidden size for LSMT layers :param num_layers: number of LSTM layers :param dropout: probability of dropout (on input to LSTM layers) :param batch_first: if True the model uses (batch,seq,feature) tensors, if false the model uses (seq, batch, feature) :param embedder: instance of nn.Embedding, if None constructor will create new embedding layer :param init_weight: range for the uniform initializer """ super(ResidualRecurrentDecoder, self).__init__() self.num_layers = num_layers self.att_rnn = RecurrentAttention(hidden_size, hidden_size, hidden_size, num_layers=1, batch_first=batch_first, dropout=dropout, fusion=fusion) self.rnn_layers = nn.ModuleList() for _ in range(num_layers - 1): self.rnn_layers.append( nn.LSTM(2 * hidden_size, hidden_size, num_layers=1, bias=True, batch_first=batch_first)) for lstm in self.rnn_layers: init_lstm_(lstm, init_weight) if embedder is not None: self.embedder = embedder else: self.embedder = nn.Embedding(vocab_size, hidden_size, padding_idx=config.PAD) nn.init.uniform_(self.embedder.weight.data, -init_weight, init_weight) self.classifier = Classifier(hidden_size, vocab_size) self.dropout = nn.Dropout(p=dropout)
def __init__(self, vocab_size, hidden_size=1024, num_layers=4, dropout=0.2, batch_first=False, embedder=None, init_weight=0.1): """ Constructor for the ResidualRecurrentEncoder. :param vocab_size: size of vocabulary :param hidden_size: hidden size for LSTM layers :param num_layers: number of LSTM layers, 1st layer is bidirectional :param dropout: probability of dropout (on input to LSTM layers) :param batch_first: if True the model uses (batch,seq,feature) tensors, if false the model uses (seq, batch, feature) :param embedder: instance of nn.Embedding, if None constructor will create new embedding layer :param init_weight: range for the uniform initializer """ super(ResidualRecurrentEncoder, self).__init__() self.batch_first = batch_first self.rnn_layers = nn.ModuleList() # 1st LSTM layer, bidirectional self.rnn_layers.append( EmuBidirLSTM(hidden_size, hidden_size, num_layers=1, bias=True, batch_first=batch_first, bidirectional=True)) # 2nd LSTM layer, with 2x larger input_size self.rnn_layers.append( nn.LSTM((2 * hidden_size), hidden_size, num_layers=1, bias=True, batch_first=batch_first)) # Remaining LSTM layers for _ in range(num_layers - 2): self.rnn_layers.append( nn.LSTM(hidden_size, hidden_size, num_layers=1, bias=True, batch_first=batch_first)) init_lstm_(self.rnn_layers[0].bidir) for lstm in self.rnn_layers[1:]: init_lstm_(lstm) self.dropout = nn.Dropout(p=dropout) self.share_embedding = (embedder is not None) if embedder is not None: self.embedder = embedder else: self.embedder = nn.Embedding(vocab_size, hidden_size, padding_idx=config.PAD) nn.init.uniform_(self.embedder.weight.data, -init_weight, init_weight)