Esempio n. 1
0
def train(model, iterator, optimizer, criterion, clip):
    model.train()

    epoch_loss = 0

    for batch in iterator:
        src = batch[0]
        trg = batch[1]

        src = src.type(torch.LongTensor).to(device)
        trg = trg.type(torch.LongTensor).to(device)

        optimizer.zero_grad()

        output, _ = model(src, trg[:, :-1])

        # output = [batch size, trg len - 1, output dim]
        # trg = [batch size, trg len]

        output_dim = output.shape[-1]

        output = output.contiguous().view(-1, output_dim)
        trg = trg[:, 1:].contiguous().view(-1)

        # output = [batch size * trg len - 1, output dim]
        # trg = [batch size * trg len - 1]

        loss = criterion(output, trg)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        print(loss.item())

        # Deleting Used Tensors
        # del src
        # del trg
        # gc.collect()

        epoch_loss += loss.item()

    return epoch_loss / len(iterator)
def train(model, iterator, optimizer, criterion, clip):
    model.train()

    epoch_loss = 0

    for i, batch in enumerate(iterator):
        src = batch.src
        trg = batch.trg

        optimizer.zero_grad()

        output, _ = model(src, trg[:, :-1])

        # output = [batch size, trg len - 1, output dim]
        # trg = [batch size, trg len]

        output_dim = output.shape[-1]

        output = output.contiguous().view(-1, output_dim)
        trg = trg[:, 1:].contiguous().view(-1)

        # output = [batch size * trg len - 1, output dim]
        # trg = [batch size * trg len - 1]

        loss = criterion(output, trg)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        print(loss.item())

        epoch_loss += loss.item()

    return epoch_loss / len(iterator)
Esempio n. 3
0
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
Esempio n. 4
0
                    ENC_DROPOUT, device)
dec = model.Decoder(OUTPUT_DIM, EMB_DIM, HID_DIM, DEC_LAYERS, DEC_KERNEL_SIZE,
                    DEC_DROPOUT, TRG_PAD_IDX, device)

model = model.CNNSeq2Seq(enc, dec).to(device)


# For checking num. of trainable parameters in Model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


# print(f'The model has {count_parameters(model):,} trainable parameters')

# Setting up Optimizer and Loss function for Training
optimizer = optim.Adam(model.parameters(), lr=config.Learning_Rate)
# scheduler = StepLR(optimizer, step_size=3, gamma=0.1)
criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

# Comment Out exit(0) Only in case to Train the model
# exit(0)


# Defining Training Step
def train(model, iterator, optimizer, criterion, clip):
    model.train()

    epoch_loss = 0

    for batch in iterator:
        src = batch[0]
enc = model.Encoder(INPUT_DIM, EMB_DIM, HID_DIM, ENC_LAYERS, ENC_KERNEL_SIZE,
                    ENC_DROPOUT, device)
dec = model.Decoder(OUTPUT_DIM, EMB_DIM, HID_DIM, DEC_LAYERS, DEC_KERNEL_SIZE,
                    DEC_DROPOUT, TRG_PAD_IDX, device)

model = model.CNNSeq2Seq(enc, dec).to(device)


# For checking num. of trainable parameters in Model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


# print(f'The model has {count_parameters(model):,} trainable parameters')

optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)


def train(model, iterator, optimizer, criterion, clip):
    model.train()

    epoch_loss = 0

    for i, batch in enumerate(iterator):
        src = batch.src
        trg = batch.trg

        optimizer.zero_grad()

        output, _ = model(src, trg[:, :-1])