Beispiel #1
0
    logger.info("Load NLG model from path %s" % (opt.read_model_path))

##### Training and Decoding #####
if not opt.testing:
    nsentences = max(len(labeled_dataset), len(unlabeled_dataset))
    slot_loss_function = set_celoss_function(
        ignore_index=slu_vocab.slot2id[PAD])
    intent_loss_function = set_celoss_function()
    num_training_steps = (
        (nsentences + opt.batchSize - 1) // opt.batchSize) * opt.max_epoch
    num_warmup_steps = int(num_training_steps * opt.warmup_ratio)
    slu_optimizer, slu_scheduler = set_optimizer(
        train_model.slu_model,
        lr=slu_params['lr'],
        l2=slu_params['l2'],
        max_norm=slu_params['max_norm'],
        layerwise_decay=slu_params['layerwise_decay'],
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps,
        lr_schedule=opt.lr_schedule)
    surface_loss_function = set_celoss_function(
        ignore_index=nlg_vocab.word2id[PAD])
    slot_control_function = set_scloss_function(
        slot_weight=nlg_params['slot_weight'])
    nlg_optimizer, nlg_scheduler = set_optimizer(
        train_model.nlg_model,
        lr=nlg_params['lr'],
        l2=nlg_params['l2'],
        max_norm=nlg_params['max_norm'],
        lr_schedule='constant')
    logger.info("Training starts at %s" %
Beispiel #2
0
##### Model Initialization #####
if not opt.testing:
    word2id = vocab.sfm2id if params['surface_level'] else vocab.word2id
    ratio = load_embeddings(opt.dataset, train_model.word_embed, word2id, device)
    logger.info("%.2f%% word embeddings from pretrained vectors" % (ratio * 100))
else:
    model_path = os.path.join(opt.read_model_path, 'model.pkl')
    ckpt = torch.load(open(model_path, 'rb'), map_location=device)
    train_model.load_state_dict(ckpt)
    logger.info("Load model from path %s" % (model_path))

##### Training and Decoding #####
if not opt.testing:
    word2id = vocab.sfm2id if params['surface_level'] else vocab.word2id
    loss_function = set_celoss_function(ignore_index=word2id[PAD])
    optimizer, scheduler = set_optimizer(train_model, lr=opt.lr, l2=opt.l2, max_norm=opt.max_norm, lr_schedule='constant')
    logger.info("Training starts at %s" % (time.asctime(time.localtime(time.time()))))
    train_data_index = np.arange(len(train_dataset))
    nsentences = len(train_data_index)
    best_result = {"losses": [], "iter": 0, "dev_ppl": float('inf'), "test_ppl": float('inf'),}
    for i in range(opt.max_epoch):
        start_time = time.time()
        np.random.shuffle(train_data_index)
        losses = []
        train_model.train()
        for j in range(0, nsentences, opt.batchSize):
            optimizer.zero_grad()
            inputs, lens, _ = get_minibatch(train_dataset, vocab, task=task, data_index=train_data_index, index=j,
                batch_size=opt.batchSize, device=device, surface_level=params['surface_level'])
            batch_scores = train_model(inputs, lens)
            batch_loss = loss_function(batch_scores, inputs[:, 1:])
Beispiel #3
0
    model_path = os.path.join(opt.read_model_path, 'model.pkl')
    ckpt = torch.load(open(model_path, 'rb'), map_location=device)
    train_model.load_state_dict(ckpt)
    logger.info("Load model from path %s" % (model_path))

##### Training and Decoding #####
if not opt.testing:
    slot_loss_function = set_celoss_function(ignore_index=vocab.slot2id[PAD])
    intent_loss_function = set_celoss_function()
    num_training_steps = ((len(train_dataset) + opt.batchSize - 1) //
                          opt.batchSize) * opt.max_epoch
    num_warmup_steps = int(num_training_steps * opt.warmup_ratio)
    optimizer, scheduler = set_optimizer(train_model,
                                         lr=opt.lr,
                                         l2=opt.l2,
                                         max_norm=opt.max_norm,
                                         layerwise_decay=opt.layerwise_decay,
                                         num_warmup_steps=num_warmup_steps,
                                         num_training_steps=num_training_steps,
                                         lr_schedule=opt.lr_schedule)
    logger.info("Training starts at %s" %
                (time.asctime(time.localtime(time.time()))))
    train_data_index = np.arange(len(train_dataset))
    nsentences, coefficient = len(train_data_index), 0.5
    best_result = {
        "losses": [],
        "iter": 0,
        "dev_slot": 0.,
        "dev_intent": 0.,
        "test_slot": 0.,
        "test_intent": 0.,
    }
Beispiel #4
0
    all_hyps = []
    with torch.no_grad():
        for i in range(0, len(dataset), args.batch_size):
            current_batch = Batch.from_example_list(dataset[i: i + args.batch_size], device, train=False)
            hyps = model.parse(current_batch, args.beam_size)
            all_hyps.extend(hyps)
        acc = evaluator.acc(all_hyps, dataset, output_path, acc_type=acc_type, etype='match', use_checker=use_checker)
    torch.cuda.empty_cache()
    gc.collect()
    return acc

if not args.testing:
    num_training_steps = ((len(train_dataset) + args.batch_size - 1) // args.batch_size) * args.max_epoch
    num_warmup_steps = int(num_training_steps * args.warmup_ratio)
    logger.info('Total training steps: %d;\t Warmup steps: %d' % (num_training_steps, num_warmup_steps))
    optimizer, scheduler = set_optimizer(model, args, num_warmup_steps, num_training_steps)
    start_epoch, nsamples, best_result = 0, len(train_dataset), {'dev_acc': 0.}
    train_index, step_size = np.arange(nsamples), args.batch_size // args.grad_accumulate
    if args.read_model_path and args.load_optimizer:
        optimizer.load_state_dict(check_point['optim'])
        scheduler.load_state_dict(check_point['scheduler'])
        start_epoch = check_point['epoch'] + 1
    logger.info('Start training ......')
    for i in range(start_epoch, args.max_epoch):
        start_time = time.time()
        epoch_loss, epoch_gp_loss, count = 0, 0, 0
        np.random.shuffle(train_index)
        model.train()
        for j in range(0, nsamples, step_size):
            count += 1
            cur_dataset = [train_dataset[k] for k in train_index[j: j + step_size]]