Ejemplo n.º 1
0
  torch.cuda.manual_seed(const.SEED)
  torch.backends.cudnn.deterministic = True

  model = Transformer(
    source_vocab_size=SOURCE_VOCAB_SIZE,
    target_vocab_size=TARGET_VOCAB_SIZE,
    source_padding_index=SRC_PAD_IDX,
    target_padding_index=TRG_PAD_IDX,
    embedding_size=const.EMBEDDING_SIZE,
    number_of_layers=const.NUMBER_OF_LAYERS,
    number_of_heads=const.NUMBER_OF_HEADS,
    forward_expansion=const.FORWARD_EXPANSION,
    device=device,
  ).to(device)

  model.apply(model_utils.initialize_weights)
  optimizer = torch.optim.Adam(model.parameters(), lr=const.LEARNING_RATE)
  cross_entropy = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

  print(f'The model has {model_utils.count_parameters(model):,} trainable parameters')

  trainer = Trainer(
    const=const,
    optimizer=optimizer,
    criterion=cross_entropy,
    device=device,
  )

  trainer.train(
    model=model,
    train_iterator=train_iterator,
Ejemplo n.º 2
0

model = Transformer(src_pad_idx=src_pad_idx,
                    trg_pad_idx=trg_pad_idx,
                    trg_sos_idx=trg_sos_idx,
                    d_model=d_model,
                    enc_voc_size=enc_voc_size,
                    dec_voc_size=dec_voc_size,
                    max_len=max_len,
                    ffn_hidden=ffn_hidden,
                    n_head=n_head,
                    n_layers=n_layers,
                    drop_prob=drop_prob,
                    device=device).to(device)

model.apply(initialize_weights)
optimizer = Adam(params=model.parameters(),
                 lr=init_lr,
                 weight_decay=weight_decay,
                 eps=adam_eps)

criterion = nn.CrossEntropyLoss(ignore_index=src_pad_idx)


def train(model, iterator, optimizer, criterion, clip):
    model.train()
    epoch_loss = 0
    for i, batch in enumerate(iterator):
        src = batch.src
        trg = batch.trg