Exemple #1
0
graph_train, graph_val = [], []
best_val_loss = 100.0
best_bleu_score = 0.0
best_val_acc = 0.0
n_epochs = 200
train_remainder1 = len(train_data1) % N
val_remainder1 = len(val_data1) % N

for epoch in range(n_epochs):
    start_time = time.time()
    train_loss, train_acc = 0.0, 0.0
    val_loss, val_acc = 0.0, 0.0
    correct = 0
    total_loss = 0
    total_val_len = 0
    en_hidden = encoder.init_hidden()
    de_hidden = decoder.init_hidden()

    encoder.train()
    decoder.train()
    for batch in range(0, len(train_data1) - train_remainder1, N):
        loss = 0

        data_raw1 = train_data1[batch:batch + N]
        data_raw2 = train_data2[batch:batch + N]
        target_raw1 = train_target1[batch:batch + N]
        target_raw2 = train_target2[batch:batch + N]

        ### lang 1

        data1, target1, _seq_len_x1, seq_len_y1 = google_train_vectorize(
Exemple #2
0
class KeplerModel(pl.LightningModule):
    def __init__(self):
        super(KeplerModel, self).__init__()

        #Initialize Model Parameters Using Config Properties
        self.model = Encoder(config['seq_length'], config['hidden_size'],
                             config['output_dim'], config['n_layers'])

        #Initialize a Cross Entropy Loss Criterion for Training
        self.criterion = torch.nn.CrossEntropyLoss()

    #Define a Forward Pass of the Model
    def forward(self, x, h):
        return self.model.forward(x, h)

    def training_step(self, batch, batch_idx):

        #Set Model to Training Mode
        self.model.train()

        #Unpack Data and Labels from Batch
        x, y = batch

        #Reshape Data into Shape (batch_size, 1, seq_length)
        x = x.view(x.size(0), -1, x.size(1))

        #Initalize the hidden state for forward pass
        h = self.model.init_hidden(x.size(0))

        #Zero out the model gradients to avoid accumulation
        self.model.zero_grad()

        #Forward Pass Through Model
        out, h = self.forward(x, h)

        #Calculate Cross Entropy Loss
        loss = self.criterion(out, y.long().squeeze())

        #Obtain Class Labels
        y_hat = torch.max(out, 1)[1]

        #Compute the balanced accuracy (weights based on number of ex. in each class)
        accuracy = balanced_accuracy_score(y, y_hat)

        #Compute weighted f1 score to account for class imbalance
        f1 = f1_score(y, y_hat, average='weighted')

        #Create metric object for tensorboard logging
        tensorboard_logs = {
            'train_loss': loss.item(),
            'accuracy': accuracy,
            'f1': f1
        }

        return {'loss': loss, 'log': tensorboard_logs}

    def validation_step(self, batch, batch_idx):

        #Set Model to Eval Mode
        self.model.eval()

        #Unpack data and labels from batch
        x, y = batch

        #Initialize Hidden State
        h = self.model.init_hidden(x.size(0))

        #Reshape Data into Shape (batch_size, 1, seq_length)
        x = x.view(x.size(0), -1, x.size(1))

        #Calculate Forward Pass of The Model
        out, h = self.forward(x, h)

        #Calculate Cross Entropy Loss
        loss = self.criterion(out, y.long().squeeze())

        #Calculate Class Indicies
        y_hat = torch.max(out, 1)[1]

        #Calculate Balanced Accuracy
        val_accuracy = torch.Tensor([balanced_accuracy_score(y, y_hat)])

        #Calculate Balanced Accuracy
        val_f1 = torch.Tensor([f1_score(y, y_hat, average='weighted')])

        #Create a metrics object
        metrics = {
            'val_loss': loss,
            'val_accuracy': val_accuracy,
            'val_f1': val_f1
        }

        return metrics

    def validation_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['val_accuracy'] for x in outputs]).mean()
        avg_f1 = torch.stack([x['val_f1'] for x in outputs]).mean()

        tensorboard_logs = {
            'val_loss': avg_loss,
            'val_acc': avg_acc,
            'val_f1': avg_f1
        }

        return {'avg_val_loss': avg_loss, 'log': tensorboard_logs}

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

    @pl.data_loader
    def train_dataloader(self):
        # REQUIRED
        return DataLoader(KeplerDataset(mode="train"),
                          batch_size=64,
                          shuffle=True)

    @pl.data_loader
    def val_dataloader(self):
        # REQUIRED
        return DataLoader(KeplerDataset(mode="test"),
                          batch_size=128,
                          shuffle=True)
Exemple #3
0
P = Generator(hidden_size, hidden_size, input_size, use_cuda=use_cuda)
D = Discriminator(input_size, hidden_size, 64, use_cuda=use_cuda)

G_optimizer = optim.Adam(chain(Q.parameters(), P.parameters()),
                         lr=5e-5,
                         betas=(0.5, 0.999))
D_optimizer = optim.Adam(D.parameters(), lr=5e-5, betas=(0.5, 0.999))

criterion = nn.CrossEntropyLoss()

training_pairs = [
    variables_from_pair(input_lang, output_lang, random.choice(pairs))
    for i in range(int(1e5))
]

h_Q = Q.init_hidden(batch)
h_P = P.init_hidden(batch)
h_D_enc = D.init_hidden(batch)
h_D_gen = D.init_hidden(batch)

for epoch in range(int(1e5)):

    x = training_pairs[epoch][0]
    x = x.unsqueeze(0).cpu()

    reset_grad([Q, P, D])

    x = to_onehot(x.squeeze()).unsqueeze(0)

    z = Variable(torch.zeros(batch, x.size(1), hidden_size))
    E_x, h_E = Q(x.cpu(), h_Q.cpu())
Exemple #4
0
def main():
    data_set = F2EDataSet(max_length=max_seq_len)
    loader = DataLoader(data_set, batch_size=batch_size, shuffle=True)
    encoder = Encoder(data_set.in_lang.token_n,
                      embed_size=embed_size,
                      hidden_size=hidden_size,
                      num_layers=num_layers,
                      drop_prob=drop_prob).to(device)
    decoder = Decoder(vocab_size=data_set.out_lang.token_n,
                      embed_size=embed_size,
                      hidden_size=hidden_size,
                      num_layers=num_layers,
                      attention_size=attention_size,
                      drop_prob=drop_prob).to(device)
    enc_optimizer = optim.Adam(encoder.parameters(), lr=lr)
    dec_optimizer = optim.Adam(decoder.parameters(), lr=lr)
    criteon = nn.CrossEntropyLoss(reduction='none').to(device)
    random_sample_sentences = data_set.random_sample(k=random_sample_k)
    sample_in_indices = []
    for in_sentence, out_sentence in random_sample_sentences:
        sample_in_indices.append(
            data_set.convert_token_to_index(data_set.in_lang, in_sentence))
    # sample_in_indices: shape[random_sample_k, max_len], dtype: int64
    sample_in_indices = torch.LongTensor(sample_in_indices).to(device)
    # sample_in_indices: [random_sample_k, 1, max_len]
    sample_in_indices = torch.unsqueeze(sample_in_indices, dim=1)
    for epoch in range(num_epochs):
        total_loss = 0
        encoder.train()
        decoder.train()
        for batch_idx, (in_seq, out_seq) in enumerate(loader):
            this_batch_size = in_seq.shape[0]
            # in_seq, out_seq shape: [batch_size, max_len], dtype = int64
            in_seq, out_seq = in_seq.to(device), out_seq.to(device)
            # enc_outputs of shape (seq_len, batch, num_directions * hidden_size)
            # enc_hidden of shape (num_layers * num_directions, batch, hidden_size)
            enc_outputs, enc_hidden = encoder(
                in_seq, encoder.init_hidden(this_batch_size, device=device))
            # 解码器在最初时间步的输入是BOS
            # dec_input: [batch_size, 1]
            dec_input = decoder.init_input(this_batch_size, device=device)
            # initialize hidden state of decoder
            # dec_hidden: [num_layers, batch_size, hidden_size]
            dec_hidden = decoder.init_hidden(enc_hidden)
            # mask [batch_size]
            mask = torch.ones(this_batch_size, device=device)
            eos = torch.LongTensor([2] * this_batch_size).to(device)
            pad = torch.zeros(this_batch_size).to(device)
            num_not_pad_tokens = 0
            loss = 0
            for y in torch.transpose(out_seq, 0, 1):
                dec_output, dec_hidden = decoder(dec_input, dec_hidden,
                                                 enc_outputs)
                loss += torch.sum((criteon(dec_output, y) * mask), dim=0)
                # y: [batch_size] => [batch_size, 1]
                dec_input = torch.unsqueeze(y, dim=1)
                num_not_pad_tokens += torch.sum(mask, dim=0)
                # 当遇到EOS时,序列后面的词将均为PAD,相应位置的掩码设成0
                mask = torch.where(y != eos, mask, pad)
            loss /= num_not_pad_tokens
            total_loss += loss
            enc_optimizer.zero_grad()
            dec_optimizer.zero_grad()
            loss.backward()
            enc_optimizer.step()
            dec_optimizer.step()
        decoder.eval()
        encoder.eval()
        print(f"epoch {epoch+1}, loss = {total_loss/data_set.__len__()}")
        if epoch % 10 == 0:
            translate(data_set, random_sample_sentences, sample_in_indices,
                      encoder, decoder, device)
    translate(data_set, random_sample_sentences, sample_in_indices, encoder,
              decoder, device)