def __init__(self, config, input_size, embed_size, hidden_size, vocab, idx2word, num_layers=1, dropout=0.5, use_attn=True, use_peep=True, dec_len=50): super(Decoder, self).__init__() self.config = config self.input_size = input_size self.embed_size = embed_size self.hidden_size = hidden_size self.output_size = input_size self.dropout = nn.Dropout(p=dropout) self.embed = nn.Embedding(input_size, embed_size) self.rnn = nn.LSTM(embed_size, hidden_size, num_layers=num_layers, \ dropout=dropout, bidirectional=False, batch_first=True) self.use_attn = use_attn self.use_peep = use_peep if use_attn: self.dropout_on_attn = nn.Dropout(p=self.config.dropout_attn_prob) self.attn = nn.ModuleDict({ 'query': Attn(2 * hidden_size, hidden_size), 'parse': Attn(2 * hidden_size, hidden_size) }) if use_peep: # whether to use latent variable z at each time step self.out = nn.ModuleDict({ 'query': nn.Linear(3 * hidden_size + self.config.latent_size, self.output_size), 'parse': nn.Linear(3 * hidden_size + self.config.latent_size, self.output_size) }) else: self.out = nn.ModuleDict({ 'query': nn.Linear(3 * hidden_size, self.output_size), 'parse': nn.Linear(3 * hidden_size, self.output_size) }) else: if use_peep: self.out = nn.Linear(hidden_size + self.config.latent_size, self.output_size) else: self.out = nn.Linear(hidden_size, self.output_size) self.word2idx = vocab self.idx2word = idx2word self.dec_len = dec_len
def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1): super(LuongAttnDecoderRNN, self).__init__() # Keep for reference self.attn_model = attn_model self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout = dropout # Define layers self.embedding = embedding self.embedding_dropout = nn.Dropout(dropout) self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout)) self.concat = nn.Linear(hidden_size * 2, hidden_size) self.out = nn.Linear(hidden_size, output_size) self.attn = Attn(attn_model, hidden_size)
def __init__(self, dm, dropout=0.1): super(EncoderBlock, self).__init__() self.pe = PositionalEncoding(dm, dropout) self.self_attn = Attn() self.ffn = PositionWiseFFN(dm, dm // 2) self.dropout = dropout self.highways = utils.clones(HighWay(dm, dropout), 2)
def __init__(self, vocab_size, embed_size, hidden_size, slot_size, intent_size, dropout=0.3, pad_idx=0): super(SDEN, self).__init__() self.pad_idx = 0 self.embed = nn.Embedding(vocab_size, embed_size, padding_idx=self.pad_idx) self.bigru_m = nn.GRU(embed_size, hidden_size, batch_first=True, bidirectional=True) self.bigru_c = nn.GRU(embed_size, hidden_size, batch_first=True, bidirectional=True) self.context_encoder = nn.Sequential( nn.Linear(hidden_size * 4, hidden_size * 2), nn.Sigmoid()) self.Att = Attn('concat', hidden_size) self.context_encoder1 = nn.Sequential( nn.Linear(hidden_size * 8, hidden_size * 2), nn.Sigmoid()) self.session_encoder = nn.GRU(hidden_size * 2, hidden_size * 2, batch_first=True, bidirectional=True) self.decoder_1 = nn.GRU(embed_size, hidden_size * 2, batch_first=True, bidirectional=True) self.decoder_2 = nn.LSTM(hidden_size * 4, hidden_size * 2, batch_first=True, bidirectional=True) self.intent_linear = nn.Linear(hidden_size * 4, intent_size) self.slot_linear = nn.Linear(hidden_size * 4, slot_size) self.dropout = nn.Dropout(dropout) self.attention = SelfAttention(hidden_size) self.att = SelfA(hidden_size) self.hidden_size = hidden_size # self.att = Attn('concat', 64) for param in self.parameters(): if len(param.size()) > 1: nn.init.xavier_uniform_(param) else: param.data.zero_()
def __init__(self, vocab_size, hidden_size, embed_size): super(Net, self).__init__() self.vocab_size = vocab_size self.hidden_size = hidden_size self.embed_size = embed_size self.embedding = nn.Embedding(vocab_size, embed_size) self.gru = nn.GRU(embed_size, hidden_size) self.attn = Attn(hidden_size) self.fc1 = nn.Linear(hidden_size, hidden_size // 8) self.fc2 = nn.Linear(hidden_size // 8, 1)
def __init__(self, batch_size, inputs_size, vocab_size, hidden_size,\ dropout_p=0.01, gpu=True): super(ATTNDecoder, self).__init__() self.num_layers = 1 self.batch_size = batch_size self.inputs_size = inputs_size self.hidden_size = hidden_size self.dropout_p = dropout_p self.vocab_size = vocab_size self.gpu = gpu self.embedding = nn.Embedding(vocab_size, inputs_size, padding_idx=0) self.dropout = nn.Dropout(dropout_p) self.gru = nn.GRU(inputs_size * 2, hidden_size, batch_first=True) self.attention = Attn('concat', hidden_size) self.out = nn.Linear(hidden_size, vocab_size)
def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1): """ Args: attn_model: 'dot' or 'general' or '' embedding: embedding of current input word, shape=[1, batch_size, embedding_size] hidden_size: int output_size: vocabulary.num_words """ super(LuongAttnDecoderRNN, self).__init__() # Keep for reference self.attn_model = attn_model self.embedding = embedding self.embedding_size = embedding.embedding_dim self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout = dropout # Define layers self.embedding_dropout = nn.Dropout(dropout) self.gru = nn.GRU(self.embedding_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout)) # applies a linear transformation to the incoming data: y=xAT+b self.concat = nn.Linear(hidden_size * 2, hidden_size) self.out = nn.Linear(hidden_size, output_size) self.attn = Attn(attn_model, hidden_size)