graph_train, graph_val = [], [] best_val_loss = 100.0 best_bleu_score = 0.0 best_val_acc = 0.0 n_epochs = 200 train_remainder1 = len(train_data1) % N val_remainder1 = len(val_data1) % N for epoch in range(n_epochs): start_time = time.time() train_loss, train_acc = 0.0, 0.0 val_loss, val_acc = 0.0, 0.0 correct = 0 total_loss = 0 total_val_len = 0 en_hidden = encoder.init_hidden() de_hidden = decoder.init_hidden() encoder.train() decoder.train() for batch in range(0, len(train_data1) - train_remainder1, N): loss = 0 data_raw1 = train_data1[batch:batch + N] data_raw2 = train_data2[batch:batch + N] target_raw1 = train_target1[batch:batch + N] target_raw2 = train_target2[batch:batch + N] ### lang 1 data1, target1, _seq_len_x1, seq_len_y1 = google_train_vectorize(
class KeplerModel(pl.LightningModule): def __init__(self): super(KeplerModel, self).__init__() #Initialize Model Parameters Using Config Properties self.model = Encoder(config['seq_length'], config['hidden_size'], config['output_dim'], config['n_layers']) #Initialize a Cross Entropy Loss Criterion for Training self.criterion = torch.nn.CrossEntropyLoss() #Define a Forward Pass of the Model def forward(self, x, h): return self.model.forward(x, h) def training_step(self, batch, batch_idx): #Set Model to Training Mode self.model.train() #Unpack Data and Labels from Batch x, y = batch #Reshape Data into Shape (batch_size, 1, seq_length) x = x.view(x.size(0), -1, x.size(1)) #Initalize the hidden state for forward pass h = self.model.init_hidden(x.size(0)) #Zero out the model gradients to avoid accumulation self.model.zero_grad() #Forward Pass Through Model out, h = self.forward(x, h) #Calculate Cross Entropy Loss loss = self.criterion(out, y.long().squeeze()) #Obtain Class Labels y_hat = torch.max(out, 1)[1] #Compute the balanced accuracy (weights based on number of ex. in each class) accuracy = balanced_accuracy_score(y, y_hat) #Compute weighted f1 score to account for class imbalance f1 = f1_score(y, y_hat, average='weighted') #Create metric object for tensorboard logging tensorboard_logs = { 'train_loss': loss.item(), 'accuracy': accuracy, 'f1': f1 } return {'loss': loss, 'log': tensorboard_logs} def validation_step(self, batch, batch_idx): #Set Model to Eval Mode self.model.eval() #Unpack data and labels from batch x, y = batch #Initialize Hidden State h = self.model.init_hidden(x.size(0)) #Reshape Data into Shape (batch_size, 1, seq_length) x = x.view(x.size(0), -1, x.size(1)) #Calculate Forward Pass of The Model out, h = self.forward(x, h) #Calculate Cross Entropy Loss loss = self.criterion(out, y.long().squeeze()) #Calculate Class Indicies y_hat = torch.max(out, 1)[1] #Calculate Balanced Accuracy val_accuracy = torch.Tensor([balanced_accuracy_score(y, y_hat)]) #Calculate Balanced Accuracy val_f1 = torch.Tensor([f1_score(y, y_hat, average='weighted')]) #Create a metrics object metrics = { 'val_loss': loss, 'val_accuracy': val_accuracy, 'val_f1': val_f1 } return metrics def validation_end(self, outputs): # OPTIONAL avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() avg_acc = torch.stack([x['val_accuracy'] for x in outputs]).mean() avg_f1 = torch.stack([x['val_f1'] for x in outputs]).mean() tensorboard_logs = { 'val_loss': avg_loss, 'val_acc': avg_acc, 'val_f1': avg_f1 } return {'avg_val_loss': avg_loss, 'log': tensorboard_logs} def configure_optimizers(self): return torch.optim.Adam(self.parameters(), lr=0.001) @pl.data_loader def train_dataloader(self): # REQUIRED return DataLoader(KeplerDataset(mode="train"), batch_size=64, shuffle=True) @pl.data_loader def val_dataloader(self): # REQUIRED return DataLoader(KeplerDataset(mode="test"), batch_size=128, shuffle=True)
P = Generator(hidden_size, hidden_size, input_size, use_cuda=use_cuda) D = Discriminator(input_size, hidden_size, 64, use_cuda=use_cuda) G_optimizer = optim.Adam(chain(Q.parameters(), P.parameters()), lr=5e-5, betas=(0.5, 0.999)) D_optimizer = optim.Adam(D.parameters(), lr=5e-5, betas=(0.5, 0.999)) criterion = nn.CrossEntropyLoss() training_pairs = [ variables_from_pair(input_lang, output_lang, random.choice(pairs)) for i in range(int(1e5)) ] h_Q = Q.init_hidden(batch) h_P = P.init_hidden(batch) h_D_enc = D.init_hidden(batch) h_D_gen = D.init_hidden(batch) for epoch in range(int(1e5)): x = training_pairs[epoch][0] x = x.unsqueeze(0).cpu() reset_grad([Q, P, D]) x = to_onehot(x.squeeze()).unsqueeze(0) z = Variable(torch.zeros(batch, x.size(1), hidden_size)) E_x, h_E = Q(x.cpu(), h_Q.cpu())
def main(): data_set = F2EDataSet(max_length=max_seq_len) loader = DataLoader(data_set, batch_size=batch_size, shuffle=True) encoder = Encoder(data_set.in_lang.token_n, embed_size=embed_size, hidden_size=hidden_size, num_layers=num_layers, drop_prob=drop_prob).to(device) decoder = Decoder(vocab_size=data_set.out_lang.token_n, embed_size=embed_size, hidden_size=hidden_size, num_layers=num_layers, attention_size=attention_size, drop_prob=drop_prob).to(device) enc_optimizer = optim.Adam(encoder.parameters(), lr=lr) dec_optimizer = optim.Adam(decoder.parameters(), lr=lr) criteon = nn.CrossEntropyLoss(reduction='none').to(device) random_sample_sentences = data_set.random_sample(k=random_sample_k) sample_in_indices = [] for in_sentence, out_sentence in random_sample_sentences: sample_in_indices.append( data_set.convert_token_to_index(data_set.in_lang, in_sentence)) # sample_in_indices: shape[random_sample_k, max_len], dtype: int64 sample_in_indices = torch.LongTensor(sample_in_indices).to(device) # sample_in_indices: [random_sample_k, 1, max_len] sample_in_indices = torch.unsqueeze(sample_in_indices, dim=1) for epoch in range(num_epochs): total_loss = 0 encoder.train() decoder.train() for batch_idx, (in_seq, out_seq) in enumerate(loader): this_batch_size = in_seq.shape[0] # in_seq, out_seq shape: [batch_size, max_len], dtype = int64 in_seq, out_seq = in_seq.to(device), out_seq.to(device) # enc_outputs of shape (seq_len, batch, num_directions * hidden_size) # enc_hidden of shape (num_layers * num_directions, batch, hidden_size) enc_outputs, enc_hidden = encoder( in_seq, encoder.init_hidden(this_batch_size, device=device)) # 解码器在最初时间步的输入是BOS # dec_input: [batch_size, 1] dec_input = decoder.init_input(this_batch_size, device=device) # initialize hidden state of decoder # dec_hidden: [num_layers, batch_size, hidden_size] dec_hidden = decoder.init_hidden(enc_hidden) # mask [batch_size] mask = torch.ones(this_batch_size, device=device) eos = torch.LongTensor([2] * this_batch_size).to(device) pad = torch.zeros(this_batch_size).to(device) num_not_pad_tokens = 0 loss = 0 for y in torch.transpose(out_seq, 0, 1): dec_output, dec_hidden = decoder(dec_input, dec_hidden, enc_outputs) loss += torch.sum((criteon(dec_output, y) * mask), dim=0) # y: [batch_size] => [batch_size, 1] dec_input = torch.unsqueeze(y, dim=1) num_not_pad_tokens += torch.sum(mask, dim=0) # 当遇到EOS时,序列后面的词将均为PAD,相应位置的掩码设成0 mask = torch.where(y != eos, mask, pad) loss /= num_not_pad_tokens total_loss += loss enc_optimizer.zero_grad() dec_optimizer.zero_grad() loss.backward() enc_optimizer.step() dec_optimizer.step() decoder.eval() encoder.eval() print(f"epoch {epoch+1}, loss = {total_loss/data_set.__len__()}") if epoch % 10 == 0: translate(data_set, random_sample_sentences, sample_in_indices, encoder, decoder, device) translate(data_set, random_sample_sentences, sample_in_indices, encoder, decoder, device)