def __init__(self, rnn_type, embedding_dim, hidden_dim, vocab_size, max_seq_len, n_layers=1, dropout=0.1, word_dropout=None): super(CustomAttnDecoderRNN, self).__init__() self.hidden_dim = hidden_dim # same hidden dim self.embedding_dim = embedding_dim # same emb dim self.max_seq_len = max_seq_len self.vocab_size = vocab_size self.rnn_type = rnn_type self.n_layers = n_layers if self.rnn_type in ['CustomLSTM', 'CustomGRU']: #self.rnn = getattr(nn, rnn_type)(embedding_dim, hidden_dim, n_layers, dropout=dropout) if self.rnn_type == 'CustomGRU': #cell = CustomGRUCell(embedding_dim, hidden_dim) self.rnn = CustomGRU(CustomGRUCell, embedding_dim, hidden_dim, n_layers, dropout=dropout) else: raise self.attn = Attn('concat', hidden_dim) ##self.attn = Attn('concat', hidden_dim, max_seq_len) self.rnn2out = nn.Linear(hidden_dim, vocab_size) self.drop = nn.Dropout(dropout) self.dropout = dropout self.word_dropout = word_dropout
def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1): super(LuongAttnDecoderRNN, self).__init__() # Keep for reference self.attn_model = attn_model self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout = dropout # Define layers self.embedding = embedding self.embedding_dropout = nn.Dropout(dropout) self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout)) self.concat = nn.Linear(hidden_size * 2, hidden_size) self.out = nn.Linear(hidden_size, output_size) # attention layer self.attn = Attn(attn_model, hidden_size)
def __init__(self, rnn_type, attn_model, embedding_dim, hidden_dim, output_size, n_layers=1, dropout=0.5): super(LuongAttnDecoderRNN, self).__init__() # Keep for reference self.rnn_type = rnn_type self.attn_model = attn_model self.hidden_dim = hidden_dim self.output_size = output_size self.vocab_size = output_size self.n_layers = n_layers self.dropout = dropout self.embedding_dim = embedding_dim # Define layers self.embeddings = nn.Embedding(output_size, embedding_dim) self.embedding_dropout = nn.Dropout(dropout) if self.rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, self.rnn_type)(embedding_dim, hidden_dim, n_layers, dropout=dropout) self.concat = nn.Linear(hidden_dim * 2, hidden_dim) self.out = nn.Linear(hidden_dim, output_size) # Choose attention model print(attn_model) if attn_model != 'none': self.attn = Attn(attn_model, hidden_dim)
def __init__(self, hidden_size, output_size, dropout_p=0.1): super(AttnDecoder, self).__init__() self.attn_model = Attn('general', 64) self.hidden_size = hidden_size self.output_size = output_size self.dropout_p = dropout_p self.embedding = nn.Embedding(output_size, hidden_size) self.dropout = nn.Dropout(dropout_p) self.attn = Attn('concat', hidden_size) self.lstm = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, dropout=dropout_p) self.out = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax()
def __init__(self, input_size, hidden_size, batch_size, learning_rate, method, num_layers=1): dataset = Seq2SeqDataset() self.data_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True) self.vocab = dataset.vocab self.output_size = len(self.vocab) self.char2index, self.index2char = self.data_index() self.input_size = input_size self.hidden_size = hidden_size self.batch_size = batch_size self.learning_rate = learning_rate self.num_layers = 1 self.method = method self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu' self.attn = Attn(method, hidden_size) self.encoder = Encoder(input_size, hidden_size, self.output_size, self.num_layers) self.decoder = Decoder(hidden_size, self.output_size, method, self.num_layers) self.attn = self.attn.to(self.device) self.encoder = self.encoder.to(self.device) self.decoder = self.decoder.to(self.device) self.loss_function = NLLLoss() self.encoder_optim = torch.optim.Adam(self.encoder.parameters(), lr=self.learning_rate) self.decoder_optim = torch.optim.Adam(self.decoder.parameters(), lr=self.learning_rate)
def __init__(self, hidden_size, output_size, method, num_layers=1): super(Decoder, self).__init__() self.hidden_size = hidden_size self.output_size = output_size self.num_layers = num_layers self.method = method self.embedding = nn.Embedding(output_size, hidden_size) self.lstm = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True) self.lstm = self.lstm.to(device) self.attn = Attn(method, hidden_size) self.concat = nn.Linear(hidden_size * 2, hidden_size) self.tanh = nn.Tanh() self.out = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=1)
def __init__(self, attn_model, embedding_dim, hidden_size, output_size, unit='gru', n_layers=1, dropout=0.1, embedding=None, latent_dim=300, bidirectional=True): super(DecoderRNN, self).__init__() self.unit = unit self.softmax = F.softmax self.n_layers = n_layers self.attn_model = attn_model self.latent_dim = latent_dim self.hidden_size = hidden_size self.output_size = output_size self.bidirectional = bidirectional self.embedding_dropout = nn.Dropout(dropout) self.dropout = (0 if n_layers == 1 else dropout) self.out = nn.Linear(self.hidden_size, self.output_size) if embedding: self.embedding = embedding if unit == 'gru': self.rnn = nn.GRU(embedding_dim, hidden_size, n_layers, dropout=self.dropout) else: self.rnn = nn.LSTM(embedding_dim, hidden_size, n_layers, dropout=self.dropout) self.concat = nn.Linear(hidden_size * 2, hidden_size) if attn_model: self.attn = Attn(attn_model, hidden_size)
def __init__(self, rnn_type, embedding_dim, hidden_dim, output_size, n_layers=1, dropout=0.1): super(BahdanauAttnDecoderRNN, self).__init__() # Define parameters self.rnn_type = rnn_type self.hidden_size = hidden_dim self.output_size = output_size self.vocab_size = output_size self.n_layers = n_layers self.dropout = dropout self.embedding_dim = embedding_dim # Define layers self.embeddings = nn.Embedding(output_size, embedding_dim) self.embedding_dropout = nn.Dropout(dropout) self.attn = Attn('concat', hidden_dim) #self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout_p) if self.rnn_type in ['LSTM', 'GRU']: #self.rnn = getattr(nn, rnn_type)(embedding_dim, hidden_dim, n_layers, dropout=dropout) self.rnn = getattr(nn, self.rnn_type)(embedding_dim+hidden_dim, hidden_dim, n_layers, dropout=dropout) #self.out = nn.Linear(hidden_dim, output_size) self.out = nn.Linear(hidden_dim*2, output_size)
return loss.data[0], decoder_outputs def train(self): for j in range(self.num_epoch): for i, (x_data, y_data) in enumerate(self.data_loader): loss, result = self.step(x_data, y_data) _, x = self.step(['Be fair.'], ['Sois équitable !']) print('Epoch' , j) print(x) print('-------> ', self.convert2ind('Sois équitable !').cpu().numpy()[0]) trans = Translate() encoder_input = trans.convert2ind('Lâche') encoder_output, (hidden_state, cell_state) = trans.encoder(encoder_input) attn = Attn('general', trans.hidden_size) energy = attn(hidden_state, encoder_output) print(energy) print(energy.size())
class TrainBatch(): def __init__(self, input_size, hidden_size, batch_size, learning_rate, method, num_layers=1): dataset = Seq2SeqDataset() self.data_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True) self.vocab = dataset.vocab self.output_size = len(self.vocab) self.char2index, self.index2char = self.data_index() self.input_size = input_size self.hidden_size = hidden_size self.batch_size = batch_size self.learning_rate = learning_rate self.num_layers = 1 self.method = method self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu' self.attn = Attn(method, hidden_size) self.encoder = Encoder(input_size, hidden_size, self.output_size, self.num_layers) self.decoder = Decoder(hidden_size, self.output_size, method, self.num_layers) self.attn = self.attn.to(self.device) self.encoder = self.encoder.to(self.device) self.decoder = self.decoder.to(self.device) self.loss_function = NLLLoss() self.encoder_optim = torch.optim.Adam(self.encoder.parameters(), lr=self.learning_rate) self.decoder_optim = torch.optim.Adam(self.decoder.parameters(), lr=self.learning_rate) def word_to_index(self, word): char_index = [self.char2index[w] for w in list(word)] return torch.LongTensor(char_index).to(self.device) # return batch_indedx _ after softed def create_batch_tensor(self, batch_word, batch_len): batch_size = len(batch_word) seq_len = max(batch_len) seq_tensor = torch.zeros([batch_size, seq_len]).long().to(self.device) for i in range(batch_size): seq_tensor[i, :batch_len[i]] = self.word_to_index(batch_word[i]) return seq_tensor def create_batch(self, input, target): input_seq = [list(w) for w in list(input)] target_seq = [list(w) for w in list(target)] seq_pairs = sorted(zip(input_seq, target_seq), key=lambda p: len(p[0]), reverse=True) input_seq, target_seq = zip(*seq_pairs) input_len = [len(w) for w in input_seq] target_len = [len(w) for w in target_seq] input_seq = self.create_batch_tensor(input_seq, input_len) input_len = torch.LongTensor(input_len).to(self.device) target_seq = self.create_batch_tensor(target_seq, target_len) return self.create_tensor(input_seq), \ self.create_tensor(input_len), self.create_tensor(target_seq) def get_len(self, input): input_seq = [list(w) for w in list(input)] input_len = [len(w) for w in input_seq] input_len = torch.LongTensor(input_len).to(self.device) return input_len def data_index(self): char2index = {} char2index.update({w: i for i, w in enumerate(self.vocab)}) index2char = {w[1]: w[0] for w in char2index.items()} return char2index, index2char def create_tensor(self, tensor): return Variable(tensor.to(self.device)) def create_mask(self, tensor): return self.create_tensor( torch.gt(tensor, torch.LongTensor([0]).to(self.device))) def mask_NLLLoss(self, inp, target, mask): nTotal = mask.sum() crossEntropy = -torch.log(torch.gather(inp, 2, target)) loss = crossEntropy.masked_select(mask).mean() loss = loss.to(self.device) return loss, nTotal.item() def sequence_mask(self, sequence_length, max_len=None): if max_len is None: max_len = sequence_length.data.max() batch_size = sequence_length.size(0) seq_range = torch.range(0, max_len - 1).long() seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len) seq_range_expand = Variable(seq_range_expand) if sequence_length.is_cuda: seq_range_expand = seq_range_expand.cuda() seq_length_expand = ( sequence_length.unsqueeze(1).expand_as(seq_range_expand)) return seq_range_expand < seq_length_expand def masked_cross_entropy(self, output, target, length): output = output.view(-1, output.size(2)) log_output = F.log_softmax(output, 1) target = target.view(-1, 1) losses_flat = -torch.gather(log_output, 1, target) losses = losses_flat.view(*target.size()) # mask = self.sequence_mask(sequence_length=length, max_len=target.size(1)) mask = target.gt(torch.LongTensor([0]).to('cuda:0')) losses = losses * mask.float() loss = losses.sum() / length.float().sum() return loss def step(self, input, target): input_seq, input_len, target_seq = self.create_batch(input, target) # encoder_output: (batch, max_len, hidden) (5,8,64) # hidden (1, batch, 64) encoder_output, (hidden_state, cell_state) = self.encoder(input_seq, input_len) # SOS_index = torch.LongTensor(self.char2index[SOS]).to(self.device) batch_size = input_seq.size(0) max_len = target_seq.size(1) decoder_output = torch.zeros([batch_size, max_len, self.output_size]).to(self.device) # start of sentence decoder_input = torch.tensor((), dtype=torch.long) decoder_input = decoder_input.new_ones([batch_size, 1]).to(self.device) decoder_input = self.create_tensor(decoder_input * self.char2index['_']) output_tensor = torch.zeros([batch_size, max_len]) # use schedule sampling for i in range(max_len): output, (hidden_state, cell_state) = self.decoder(decoder_input, (hidden_state, cell_state), encoder_output) if rd.random() > 0.5: decoder_input = target_seq[:, i].unsqueeze(1) else: decoder_input = output.topk(1)[1] # decoder_input = target_seq[:, i].unsqueeze(1) output_index = output.topk(1)[1] output_tensor[:, i] = output_index.squeeze(1) decoder_output[:, i] = output target_len = self.get_len(target) loss_ = self.masked_cross_entropy(decoder_output, target_seq, target_len) decoder_output = decoder_output.view(-1, self.output_size) target_seq = target_seq.view(-1) # loss = self.loss_function(decoder_output, target_seq) self.encoder_optim.zero_grad() self.decoder_optim.zero_grad() # loss.backward() loss_.backward() self.encoder_optim.step() self.decoder_optim.step() return loss_.item(), output_tensor.cpu().tolist()