Esempio n. 1
0
def init_training(args):
    """ Initialize training process """

    # load vocabulary
    vocab = torch.load(args.vocab)

    # build model
    transformer = Transformer(args, vocab)

    # if finetune
    if args.finetune:
        print("[Finetune] %s" % args.finetune_model_path)
        transformer.load_state_dict(torch.load(args.finetune_model_path))

    # vocab_mask for masking padding
    vocab_mask = torch.ones(len(vocab.tgt))
    vocab_mask[vocab.tgt[constants.PAD_WORD]] = 0

    # loss object
    cross_entropy_loss = nn.CrossEntropyLoss(weight=vocab_mask,
                                             size_average=False)

    if args.cuda:
        transformer = transformer.cuda()
        cross_entropy_loss = cross_entropy_loss.cuda()

    if args.optimizer == "Warmup_Adam":
        optimizer = ScheduledOptim(
            torch.optim.Adam(transformer.get_trainable_parameters(),
                             betas=(0.9, 0.98),
                             eps=1e-09), args.d_model, args.n_warmup_steps)

    if args.optimizer == "Adam":
        optimizer = torch.optim.Adam(
            params=transformer.get_trainable_parameters(),
            lr=args.lr,
            betas=(0.9, 0.98),
            eps=1e-8)

    if args.optimizer == 'SGD':
        optimizer = torch.optim.SGD(
            params=transformer.get_trainable_parameters(), lr=args.lr)

    # multi gpus
    if torch.cuda.device_count() > 1:
        print("[Multi GPU] using", torch.cuda.device_count(), "GPUs\n")
        transformer = nn.DataParallel(transformer)

    return vocab, transformer, optimizer, cross_entropy_loss
Esempio n. 2
0
                           corpus["train"]["tgt_indexs"],
                           corpus["train"]["tgt_texts"],
                           batch_size=args.batch_size,
                           cuda=use_cuda)

validation_data = DataLoader(corpus["valid"]["src_texts"],
                             corpus["valid"]["src_turn"],
                             corpus["valid"]["tgt_indexs"],
                             corpus["valid"]["tgt_texts"],
                             batch_size=args.batch_size,
                             cuda=use_cuda)

model = Transformer(args)

criterion = CrossEntropy()
optimizer = torch.optim.Adam(model.get_trainable_parameters(),
                             lr=args.learning_rate)

if use_cuda:
    model = model.cuda()
    criterion = criterion.cuda()


def get_performance(crit, distributes, gold):
    loss = crit(distributes, gold)
    _, predict = distributes.max(dim=-1)
    n_correct = predict.eq(gold)
    n_correct = n_correct.data.masked_select(gold.ne(const.PAD)).sum()

    n_gold = gold.ne(const.PAD).sum()