Ejemplo n.º 1
0
def predict(test_data_loader, model, opt):
    if opt.delimiter_type == 0:
        delimiter_word = pykp.io.SEP_WORD
    else:
        delimiter_word = pykp.io.EOS_WORD
        
    generator = SequenceGenerator(model,
                                  bos_idx=opt.word2idx[pykp.io.BOS_WORD],
                                  eos_idx=opt.word2idx[pykp.io.EOS_WORD],
                                  pad_idx=opt.word2idx[pykp.io.PAD_WORD],
                                  peos_idx=opt.word2idx[pykp.io.PEOS_WORD],
                                  beam_size=1,
                                  max_sequence_length=opt.max_length,
                                  copy_attn=opt.copy_attention,
                                  coverage_attn=opt.coverage_attn,
                                  review_attn=opt.review_attn,
                                  cuda=opt.gpuid > -1
                                  )
    """
    if opt.one2many and opt.one2many_mode > 1:
        prediction_by_sampling(generator, test_data_loader, opt, delimiter_word)
    else:
        evaluate_beam_search(generator, test_data_loader, opt, delimiter_word)
    """
    if opt.sampling:
        raise ValueError("Not support yet!")
        #prediction_by_sampling(generator, test_data_loader, opt, delimiter_word)
    else:
        evaluate_beam_search(generator, test_data_loader, opt, delimiter_word)
Ejemplo n.º 2
0
def predict(test_data_loader, model, ntm_model, opt):
    if opt.delimiter_type == 0:
        delimiter_word = pykp.io.SEP_WORD
    else:
        delimiter_word = pykp.io.EOS_WORD
    generator = SequenceGenerator(
        model,
        ntm_model,
        opt.use_topic_represent,
        opt.topic_type,
        bos_idx=opt.word2idx[pykp.io.BOS_WORD],
        eos_idx=opt.word2idx[pykp.io.EOS_WORD],
        pad_idx=opt.word2idx[pykp.io.PAD_WORD],
        beam_size=opt.beam_size,
        max_sequence_length=opt.max_length,
        copy_attn=opt.copy_attention,
        coverage_attn=opt.coverage_attn,
        review_attn=opt.review_attn,
        length_penalty_factor=opt.length_penalty_factor,
        coverage_penalty_factor=opt.coverage_penalty_factor,
        length_penalty=opt.length_penalty,
        coverage_penalty=opt.coverage_penalty,
        cuda=opt.gpuid > -1,
        n_best=opt.n_best,
        block_ngram_repeat=opt.block_ngram_repeat,
        ignore_when_blocking=opt.ignore_when_blocking)

    evaluate_beam_search(generator, test_data_loader, opt, delimiter_word)
Ejemplo n.º 3
0
def main():
    # load settings for training
    parser = argparse.ArgumentParser(
        description='predict.py',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    config.preprocess_opts(parser)
    config.model_opts(parser)
    config.train_opts(parser)
    config.predict_opts(parser)
    config.transformer_opts(parser)
    opt = parser.parse_args()

    if opt.seed > 0:
        torch.manual_seed(opt.seed)

    # print(opt.gpuid)
    if torch.cuda.is_available() and not opt.gpuid:
        opt.gpuid = 0

    opt.exp = 'predict.' + opt.exp
    if hasattr(opt, 'copy_model') and opt.copy_model:
        opt.exp += '.copy'

    if hasattr(opt, 'bidirectional'):
        if opt.bidirectional:
            opt.exp += '.bi-directional'
    else:
        opt.exp += '.uni-directional'

    # fill time into the name
    if opt.exp_path.find('%s') > 0:
        opt.exp_path = opt.exp_path % (opt.exp, opt.timemark)
        opt.pred_path = opt.pred_path % (opt.exp, opt.timemark)

    if not os.path.exists(opt.exp_path):
        os.makedirs(opt.exp_path)
    if not os.path.exists(opt.pred_path):
        os.makedirs(opt.pred_path)

    logging = config.init_logging(logger_name=None, log_file=opt.exp_path + '/output.log', stdout=True)
    
    
    try:
        opt.train_from = 'model/kp20k.ml.copy.bi-directional.20180908-054257/kp20k.ml.copy.bi-directional.epoch=9.batch=2932.model'
        test_data_loader, word2id, id2word, vocab = load_data_vocab(opt, load_train=False)
        model = init_model(opt)

        generator = SequenceGenerator(model,opt,
                                      eos_id=opt.word2id[pykp.io.EOS_WORD],
                                      beam_size=opt.beam_size,
                                      max_sequence_length=opt.max_sent_length,
                                      )
        
        evaluate_beam_search(generator, test_data_loader, opt, title='predict', save_path=opt.pred_path + '/[epoch=%d,batch=%d,total_batch=%d]test_result.csv' % (0, 0, 0))

    except Exception as e:
        logging.exception("message")
Ejemplo n.º 4
0
def evaluate_per_epoch(model, eval_dataloader, opt, epoch):
    generator = SequenceGenerator(
        model,
        opt,
        eos_id=opt.word2id[pykp.io.EOS_WORD],
        beam_size=opt.beam_size,
        max_sequence_length=opt.max_sent_length,
    )

    evaluate_beam_search(generator,
                         eval_dataloader,
                         opt,
                         title='predict',
                         save_path=opt.pred_path + '/epoch=%s' % (epoch))
def main():
    opt = config.init_opt(description='predict.py')
    logger = config.init_logging('predict',
                                 opt.exp_path + '/output.log',
                                 redirect_to_stdout=False)

    logger.info('EXP_PATH : ' + opt.exp_path)

    logger.info('Parameters:')
    [
        logger.info('%s    :    %s' % (k, str(v)))
        for k, v in opt.__dict__.items()
    ]

    logger.info(
        '======================  Checking GPU Availability  ========================='
    )
    if torch.cuda.is_available():
        if isinstance(opt.device_ids, int):
            opt.device_ids = [opt.device_ids]
        logger.info('Running on %s! devices=%s' %
                    ('MULTIPLE GPUs' if len(opt.device_ids) > 1 else '1 GPU',
                     str(opt.device_ids)))
    else:
        logger.info('Running on CPU!')

    try:
        test_data_loaders, word2id, id2word, vocab = load_vocab_and_testsets(
            opt)
        model = init_model(opt)
        generator = SequenceGenerator(model,
                                      eos_id=opt.word2id[pykp.io.EOS_WORD],
                                      beam_size=opt.beam_size,
                                      max_sequence_length=opt.max_sent_length)

        for testset_name, test_data_loader in zip(opt.test_dataset_names,
                                                  test_data_loaders):
            logger.info('Evaluating %s' % testset_name)
            evaluate_beam_search(generator,
                                 test_data_loader,
                                 opt,
                                 title='test_%s' % testset_name,
                                 predict_save_path=opt.pred_path +
                                 '/%s_test_result/' % (testset_name))

    except Exception as e:
        logger.error(e, exc_info=True)
Ejemplo n.º 6
0
def evaluate_per_epoch(model, eval_dataloader, opt):
    generator = SequenceGenerator(
        model,
        opt,
        eos_id=opt.word2id[pykp.io.EOS_WORD],
        beam_size=opt.beam_size,
        max_sequence_length=opt.max_sent_length,
    )
    model_path = opt.train_from.split('/')[-1]
    _, epoch, batch, total_batch = re.findall('\d+', model_path)

    evaluate_beam_search(generator,
                         test_data_loader,
                         opt,
                         title='predict',
                         save_path=opt.pred_path +
                         '/epoch=%s,batch=%s,total_batch=%s' %
                         (epoch, batch, total_batch))
def predict(test_data_loader, model, opt):
    if opt.delimiter_type == 0:
        delimiter_word = pykp.io.SEP_WORD
    else:
        delimiter_word = pykp.io.EOS_WORD
    generator = SequenceGenerator(model,
                                  bos_idx=opt.word2idx[pykp.io.BOS_WORD],
                                  eos_idx=opt.word2idx[pykp.io.EOS_WORD],
                                  pad_idx=opt.word2idx[pykp.io.PAD_WORD],
                                  beam_size=opt.beam_size,
                                  max_sequence_length=opt.max_length,
                                  copy_attn=opt.copy_attention,
                                  coverage_attn=opt.coverage_attn,
                                  review_attn=opt.review_attn,
                                  include_attn_dist=opt.include_attn_dist,
                                  length_penalty_factor=opt.length_penalty_factor,
                                  coverage_penalty_factor=opt.coverage_penalty_factor,
                                  length_penalty=opt.length_penalty,
                                  coverage_penalty=opt.coverage_penalty,
                                  cuda=opt.gpuid > -1,
                                  n_best=opt.n_best,
                                  block_ngram_repeat=opt.block_ngram_repeat,
                                  ignore_when_blocking=opt.ignore_when_blocking,
                                  peos_idx=opt.word2idx[pykp.io.PEOS_WORD]
                                  )
    """
    if opt.one2many and opt.one2many_mode > 1:
        prediction_by_sampling(generator, test_data_loader, opt, delimiter_word)
    else:
        evaluate_beam_search(generator, test_data_loader, opt, delimiter_word)
    """
    if opt.sampling:
        raise ValueError("Not support yet!")
        #prediction_by_sampling(generator, test_data_loader, opt, delimiter_word)
    else:
        evaluate_beam_search(generator, test_data_loader, opt, delimiter_word)
Ejemplo n.º 8
0
def train_model(model, optimizer, criterion, train_data_loader, valid_data_loader, test_data_loader, opt):
    generator = SequenceGenerator(model,
                                  eos_id=opt.word2id[pykp.io.EOS_WORD],
                                  beam_size=opt.beam_size,
                                  max_sequence_length=opt.max_sent_length
                                  )

    logging.info('======================  Checking GPU Availability  =========================')
    if torch.cuda.is_available():
        if isinstance(opt.gpuid, int):
            opt.gpuid = [opt.gpuid]
        logging.info('Running on GPU! devices=%s' % str(opt.gpuid))
        # model = nn.DataParallel(model, device_ids=opt.gpuid)
    else:
        logging.info('Running on CPU!')

    logging.info('======================  Start Training  =========================')

    checkpoint_names        = []
    train_history_losses    = []
    valid_history_losses    = []
    test_history_losses     = []
    # best_loss = sys.float_info.max # for normal training/testing loss (likelihood)
    best_loss               = 0.0 # for f-score
    stop_increasing         = 0

    train_losses = []
    total_batch = 0
    early_stop_flag = False

    if opt.train_from:
        state_path = opt.train_from.replace('.model', '.state')
        logging.info('Loading training state from: %s' % state_path)
        if os.path.exists(state_path):
            (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_history_losses, valid_history_losses,
                        test_history_losses) = torch.load(open(state_path, 'rb'))
            opt.start_epoch = epoch

    for epoch in range(opt.start_epoch , opt.epochs):
        if early_stop_flag:
            break

        progbar = Progbar(title='Training', target=len(train_data_loader), batch_size=train_data_loader.batch_size,
                          total_examples=len(train_data_loader.dataset))

        for batch_i, batch in enumerate(train_data_loader):
            model.train()
            batch_i += 1 # for the aesthetics of printing
            total_batch += 1
            one2many_batch, one2one_batch = batch
            src, trg, trg_target, trg_copy_target, src_ext, oov_lists = one2one_batch
            max_oov_number = max([len(oov) for oov in oov_lists])

            print("src size - ",src.size())
            print("target size - ",trg.size())

            if torch.cuda.is_available():
                src = src.cuda()
                trg = trg.cuda()
                trg_target = trg_target.cuda()
                trg_copy_target = trg_copy_target.cuda()
                src_ext = src_ext.cuda()

            optimizer.zero_grad()

            '''
            Training with Maximum Likelihood (word-level error)
            '''
            decoder_log_probs, _, _ = model.forward(src, trg, src_ext, oov_lists)

            # simply average losses of all the predicitons
            # IMPORTANT, must use logits instead of probs to compute the loss, otherwise it's super super slow at the beginning (grads of probs are small)!
            start_time = time.time()

            if not opt.copy_model:
                ml_loss = criterion(
                    decoder_log_probs.contiguous().view(-1, opt.vocab_size),
                    trg_target.contiguous().view(-1)
                )
            else:
                ml_loss = criterion(
                    decoder_log_probs.contiguous().view(-1, opt.vocab_size + max_oov_number),
                    trg_copy_target.contiguous().view(-1)
                )

            '''
            Training with Reinforcement Learning (instance-level reward f-score)
            '''
            src_list, trg_list, _, trg_copy_target_list, src_oov_map_list, oov_list, src_str_list, trg_str_list = one2many_batch

            if torch.cuda.is_available():
                src_list = src_list.cuda()
                src_oov_map_list = src_oov_map_list.cuda()
            rl_loss = get_loss_rl()

            start_time = time.time()
            ml_loss.backward()
            print("--backward- %s seconds ---" % (time.time() - start_time))

            if opt.max_grad_norm > 0:
                pre_norm = torch.nn.utils.clip_grad_norm(model.parameters(), opt.max_grad_norm)
                after_norm = (sum([p.grad.data.norm(2) ** 2 for p in model.parameters() if p.grad is not None])) ** (1.0 / 2)
                logging.info('clip grad (%f -> %f)' % (pre_norm, after_norm))

            optimizer.step()

            train_losses.append(ml_loss.data[0])

            progbar.update(epoch, batch_i, [('train_loss', ml_loss.data[0]), ('PPL', ml_loss.data[0])])

            if batch_i > 1 and batch_i % opt.report_every == 0:
                logging.info('======================  %d  =========================' % (batch_i))

                logging.info('Epoch : %d Minibatch : %d, Loss=%.5f' % (epoch, batch_i, np.mean(ml_loss.data[0])))
                sampled_size = 2
                logging.info('Printing predictions on %d sampled examples by greedy search' % sampled_size)

                if torch.cuda.is_available():
                    src                 = src.data.cpu().numpy()
                    decoder_log_probs   = decoder_log_probs.data.cpu().numpy()
                    max_words_pred      = decoder_log_probs.argmax(axis=-1)
                    trg_target          = trg_target.data.cpu().numpy()
                    trg_copy_target     = trg_copy_target.data.cpu().numpy()
                else:
                    src                 = src.data.numpy()
                    decoder_log_probs   = decoder_log_probs.data.numpy()
                    max_words_pred      = decoder_log_probs.argmax(axis=-1)
                    trg_target          = trg_target.data.numpy()
                    trg_copy_target     = trg_copy_target.data.numpy()

                sampled_trg_idx     = np.random.random_integers(low=0, high=len(trg) - 1, size=sampled_size)
                src                 = src[sampled_trg_idx]
                oov_lists           = [oov_lists[i] for i in sampled_trg_idx]
                max_words_pred      = [max_words_pred[i] for i in sampled_trg_idx]
                decoder_log_probs   = decoder_log_probs[sampled_trg_idx]
                if not opt.copy_model:
                    trg_target      = [trg_target[i] for i in sampled_trg_idx] # use the real target trg_loss (the starting <BOS> has been removed and contains oov ground-truth)
                else:
                    trg_target      = [trg_copy_target[i] for i in sampled_trg_idx]

                for i, (src_wi, pred_wi, trg_i, oov_i) in enumerate(zip(src, max_words_pred, trg_target, oov_lists)):
                    nll_prob = -np.sum([decoder_log_probs[i][l][pred_wi[l]] for l in range(len(trg_i))])
                    find_copy       = np.any([x >= opt.vocab_size for x in src_wi])
                    has_copy        = np.any([x >= opt.vocab_size for x in trg_i])

                    sentence_source = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in src_wi]
                    sentence_pred   = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in pred_wi]
                    sentence_real   = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in trg_i]

                    sentence_source = sentence_source[:sentence_source.index('<pad>')] if '<pad>' in sentence_source else sentence_source
                    sentence_pred   = sentence_pred[:sentence_pred.index('<pad>')] if '<pad>' in sentence_pred else sentence_pred
                    sentence_real   = sentence_real[:sentence_real.index('<pad>')] if '<pad>' in sentence_real else sentence_real

                    logging.info('==================================================')
                    logging.info('Source: %s '          % (' '.join(sentence_source)))
                    logging.info('\t\tPred : %s (%.4f)' % (' '.join(sentence_pred), nll_prob) + (' [FIND COPY]' if find_copy else ''))
                    logging.info('\t\tReal : %s '       % (' '.join(sentence_real)) + (' [HAS COPY]' + str(trg_i) if has_copy else ''))

            if total_batch > 1 and total_batch % opt.run_valid_every == 0:
                logging.info('*' * 50)
                logging.info('Run validing and testing @Epoch=%d,#(Total batch)=%d' % (epoch, total_batch))
                # valid_losses    = _valid_error(valid_data_loader, model, criterion, epoch, opt)
                # valid_history_losses.append(valid_losses)
                valid_score_dict  = evaluate_beam_search(generator, valid_data_loader, opt, title='valid', epoch=epoch, save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch))
                test_score_dict   = evaluate_beam_search(generator, test_data_loader, opt, title='test', epoch=epoch, save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch))

                checkpoint_names.append('epoch=%d-batch=%d-total_batch=%d' % (epoch, batch_i, total_batch))
                train_history_losses.append(copy.copy(train_losses))
                valid_history_losses.append(valid_score_dict)
                test_history_losses.append(test_score_dict)
                train_losses = []

                scores = [train_history_losses]
                curve_names = ['Training Error']
                scores += [[result_dict[name] for result_dict in valid_history_losses] for name in opt.report_score_names]
                curve_names += ['Valid-'+name for name in opt.report_score_names]
                scores += [[result_dict[name] for result_dict in test_history_losses] for name in opt.report_score_names]
                curve_names += ['Test-'+name for name in opt.report_score_names]

                scores = [np.asarray(s) for s in scores]
                # Plot the learning curve
                plot_learning_curve(scores=scores,
                                    curve_names=curve_names,
                                    checkpoint_names=checkpoint_names,
                                    title='Training Validation & Test',
                                    save_path=opt.exp_path + '/[epoch=%d,batch=%d,total_batch=%d]train_valid_test_curve.png' % (epoch, batch_i, total_batch))

                '''
                determine if early stop training (whether f-score increased, before is if valid error decreased)
                '''
                valid_loss      = np.average(valid_history_losses[-1][opt.report_score_names[0]])
                is_best_loss    = valid_loss > best_loss
                rate_of_change  = float(valid_loss - best_loss) / float(best_loss) if float(best_loss) > 0 else 0.0

                # valid error doesn't increase
                if rate_of_change <= 0:
                    stop_increasing += 1
                else:
                    stop_increasing = 0

                if is_best_loss:
                    logging.info('Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f' % (
                        best_loss, valid_loss, rate_of_change * 100))
                else:
                    logging.info('Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f' % (
                        stop_increasing, best_loss, valid_loss, rate_of_change * 100))

                best_loss = max(valid_loss, best_loss)

                # only store the checkpoints that make better validation performances
                if total_batch > 1 and (total_batch % opt.save_model_every == 0 or is_best_loss): #epoch >= opt.start_checkpoint_at and
                    # Save the checkpoint
                    logging.info('Saving checkpoint to: %s' % os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' % (opt.exp, epoch, batch_i, total_batch, valid_loss) + '.model'))
                    torch.save(
                        model.state_dict(),
                        open(os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.model'), 'wb')
                    )
                    torch.save(
                        (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_history_losses, valid_history_losses, test_history_losses),
                        open(os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.state'), 'wb')
                    )

                if stop_increasing >= opt.early_stop_tolerance:
                    logging.info('Have not increased for %d epoches, early stop training' % stop_increasing)
                    early_stop_flag = True
                    break
                logging.info('*' * 50)
Ejemplo n.º 9
0
def train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, test_data_loader, opt):
    generator = SequenceGenerator(model,
                                  eos_id=opt.word2id[pykp.io.EOS_WORD],
                                  beam_size=opt.beam_size,
                                  max_sequence_length=opt.max_sent_length
                                  )

    logging.info('======================  Checking GPU Availability  =========================')
    if torch.cuda.is_available():
        if isinstance(opt.gpuid, int):
            opt.gpuid = [opt.gpuid]
        logging.info('Running on GPU! devices=%s' % str(opt.gpuid))
        # model = nn.DataParallel(model, device_ids=opt.gpuid)
    else:
        logging.info('Running on CPU!')

    logging.info('======================  Start Training  =========================')

    checkpoint_names = []
    train_ml_history_losses = []
    train_rl_history_losses = []
    valid_history_losses = []
    test_history_losses = []
    # best_loss = sys.float_info.max # for normal training/testing loss (likelihood)
    best_loss = 0.0  # for f-score
    stop_increasing = 0

    train_ml_losses = []
    train_rl_losses = []
    total_batch = -1
    early_stop_flag = False
    if opt.train_rl:
        reward_cache = RewardCache(2000)

    if False:  # opt.train_from:
        state_path = opt.train_from.replace('.model', '.state')
        logging.info('Loading training state from: %s' % state_path)
        if os.path.exists(state_path):
            (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_ml_history_losses, train_rl_history_losses, valid_history_losses,
             test_history_losses) = torch.load(open(state_path, 'rb'))
            opt.start_epoch = epoch

    for epoch in range(opt.start_epoch, opt.epochs):
        if early_stop_flag:
            break

        progbar = Progbar(logger=logging, title='Training', target=len(train_data_loader), batch_size=train_data_loader.batch_size,
                          total_examples=len(train_data_loader.dataset.examples))

        for batch_i, batch in enumerate(train_data_loader):
            model.train()
            total_batch += 1
            one2many_batch, one2one_batch = batch
            report_loss = []

            # Training
            if opt.train_ml:
                loss_ml, decoder_log_probs = train_ml(one2one_batch, model, optimizer_ml, criterion, opt)
                train_ml_losses.append(loss_ml)
                report_loss.append(('train_ml_loss', loss_ml))
                report_loss.append(('PPL', loss_ml))

                # Brief report
                if batch_i % opt.report_every == 0:
                    brief_report(epoch, batch_i, one2one_batch, loss_ml, decoder_log_probs, opt)

            # do not apply rl in 0th epoch, need to get a resonable model before that.
            if opt.train_rl:
                if epoch >= opt.rl_start_epoch:
                    loss_rl = train_rl(one2many_batch, model, optimizer_rl, generator, opt, reward_cache)
                else:
                    loss_rl = 0.0
                train_rl_losses.append(loss_rl)
                report_loss.append(('train_rl_loss', loss_rl))

            progbar.update(epoch, batch_i, report_loss)

            # Validate and save checkpoint
            if (opt.run_valid_every == -1 and batch_i == len(train_data_loader) - 1) or\
               (opt.run_valid_every > -1 and total_batch > 1 and total_batch % opt.run_valid_every == 0):
                logging.info('*' * 50)
                logging.info('Run validing and testing @Epoch=%d,#(Total batch)=%d' % (epoch, total_batch))
                # valid_losses    = _valid_error(valid_data_loader, model, criterion, epoch, opt)
                # valid_history_losses.append(valid_losses)
                valid_score_dict = evaluate_beam_search(generator, valid_data_loader, opt, title='Validating, epoch=%d, batch=%d, total_batch=%d' % (epoch, batch_i, total_batch), epoch=epoch, save_path=opt.pred_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch))
                test_score_dict = evaluate_beam_search(generator, test_data_loader, opt, title='Testing, epoch=%d, batch=%d, total_batch=%d' % (epoch, batch_i, total_batch), epoch=epoch, save_path=opt.pred_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch))

                checkpoint_names.append('epoch=%d-batch=%d-total_batch=%d' % (epoch, batch_i, total_batch))

                curve_names = []
                scores = []
                if opt.train_ml:
                    train_ml_history_losses.append(copy.copy(train_ml_losses))
                    scores += [train_ml_history_losses]
                    curve_names += ['Training ML Error']
                    train_ml_losses = []

                if opt.train_rl:
                    train_rl_history_losses.append(copy.copy(train_rl_losses))
                    scores += [train_rl_history_losses]
                    curve_names += ['Training RL Reward']
                    train_rl_losses = []

                valid_history_losses.append(valid_score_dict)
                test_history_losses.append(test_score_dict)

                scores += [[result_dict[name] for result_dict in valid_history_losses] for name in opt.report_score_names]
                curve_names += ['Valid-' + name for name in opt.report_score_names]
                scores += [[result_dict[name] for result_dict in test_history_losses] for name in opt.report_score_names]
                curve_names += ['Test-' + name for name in opt.report_score_names]

                scores = [np.asarray(s) for s in scores]
                # Plot the learning curve
                plot_learning_curve(scores=scores,
                                    curve_names=curve_names,
                                    checkpoint_names=checkpoint_names,
                                    title='Training Validation & Test',
                                    save_path=opt.exp_path + '/[epoch=%d,batch=%d,total_batch=%d]train_valid_test_curve.png' % (epoch, batch_i, total_batch))

                '''
                determine if early stop training (whether f-score increased, before is if valid error decreased)
                '''
                valid_loss = np.average(valid_history_losses[-1][opt.report_score_names[0]])
                is_best_loss = valid_loss > best_loss
                rate_of_change = float(valid_loss - best_loss) / float(best_loss) if float(best_loss) > 0 else 0.0

                # valid error doesn't increase
                if rate_of_change <= 0:
                    stop_increasing += 1
                else:
                    stop_increasing = 0

                if is_best_loss:
                    logging.info('Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f' % (
                        best_loss, valid_loss, rate_of_change * 100))
                else:
                    logging.info('Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f' % (
                        stop_increasing, best_loss, valid_loss, rate_of_change * 100))

                best_loss = max(valid_loss, best_loss)

                # only store the checkpoints that make better validation performances
                if total_batch > 1 and (total_batch % opt.save_model_every == 0 or is_best_loss):  # epoch >= opt.start_checkpoint_at and
                    # Save the checkpoint
                    logging.info('Saving checkpoint to: %s' % os.path.join(opt.model_path, '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' % (opt.exp, epoch, batch_i, total_batch, valid_loss) + '.model'))
                    torch.save(
                        model.state_dict(),
                        open(os.path.join(opt.model_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.model'), 'wb')
                    )
                    torch.save(
                        (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_ml_history_losses, train_rl_history_losses, valid_history_losses, test_history_losses),
                        open(os.path.join(opt.model_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.state'), 'wb')
                    )

                if stop_increasing >= opt.early_stop_tolerance:
                    logging.info('Have not increased for %d epoches, early stop training' % stop_increasing)
                    early_stop_flag = True
                    break
                logging.info('*' * 50)
Ejemplo n.º 10
0
def train_model(model, optimizer, criterion, train_data_loader,
                valid_data_loader, test_data_loader, opt):
    generator = SequenceGenerator(model,
                                  eos_id=opt.word2id[pykp.io.EOS_WORD],
                                  beam_size=opt.beam_size,
                                  max_sequence_length=opt.max_sent_length)

    logging.info(
        '======================  Checking GPU Availability  ========================='
    )
    if torch.cuda.is_available():
        if isinstance(opt.gpuid, int):
            opt.gpuid = [opt.gpuid]
        logging.info('Running on GPU! devices=%s' % str(opt.gpuid))
        # model = nn.DataParallel(model, device_ids=opt.gpuid)
    else:
        logging.info('Running on CPU!')

    logging.info(
        '======================  Start Training  =========================')

    checkpoint_names = []
    train_history_losses = []
    valid_history_losses = []
    test_history_losses = []
    # best_loss = sys.float_info.max # for normal training/testing loss (likelihood)
    best_loss = 0.0  # for f-score
    stop_increasing = 0

    train_losses = []
    total_batch = 0
    early_stop_flag = False

    if opt.train_from:
        state_path = opt.train_from.replace('.model', '.state')
        logging.info('Loading training state from: %s' % state_path)
        if os.path.exists(state_path):
            (epoch, total_batch, best_loss, stop_increasing, checkpoint_names,
             train_history_losses, valid_history_losses,
             test_history_losses) = torch.load(open(state_path, 'rb'))
            opt.start_epoch = epoch

    for epoch in range(opt.start_epoch, opt.epochs):
        if early_stop_flag:
            break

        progbar = Progbar(title='Training',
                          target=len(train_data_loader),
                          batch_size=train_data_loader.batch_size,
                          total_examples=len(train_data_loader.dataset))

        for batch_i, batch in enumerate(train_data_loader):
            model.train()
            batch_i += 1  # for the aesthetics of printing
            total_batch += 1
            one2many_batch, one2one_batch = batch
            src, trg, trg_target, trg_copy_target, src_ext, oov_lists = one2one_batch
            max_oov_number = max([len(oov) for oov in oov_lists])

            print("src size - ", src.size())
            print("target size - ", trg.size())

            if torch.cuda.is_available():
                src = src.cuda()
                trg = trg.cuda()
                trg_target = trg_target.cuda()
                trg_copy_target = trg_copy_target.cuda()
                src_ext = src_ext.cuda()

            optimizer.zero_grad()
            '''
            Training with Maximum Likelihood (word-level error)
            '''
            decoder_log_probs, _, _ = model.forward(src, trg, src_ext,
                                                    oov_lists)

            # simply average losses of all the predicitons
            # IMPORTANT, must use logits instead of probs to compute the loss, otherwise it's super super slow at the beginning (grads of probs are small)!
            start_time = time.time()

            if not opt.copy_model:
                ml_loss = criterion(
                    decoder_log_probs.contiguous().view(-1, opt.vocab_size),
                    trg_target.contiguous().view(-1))
            else:
                ml_loss = criterion(
                    decoder_log_probs.contiguous().view(
                        -1, opt.vocab_size + max_oov_number),
                    trg_copy_target.contiguous().view(-1))
            '''
            Training with Reinforcement Learning (instance-level reward f-score)
            '''
            src_list, trg_list, _, trg_copy_target_list, src_oov_map_list, oov_list, src_str_list, trg_str_list = one2many_batch

            if torch.cuda.is_available():
                src_list = src_list.cuda()
                src_oov_map_list = src_oov_map_list.cuda()
            rl_loss = get_loss_rl()

            start_time = time.time()
            ml_loss.backward()
            print("--backward- %s seconds ---" % (time.time() - start_time))

            if opt.max_grad_norm > 0:
                pre_norm = torch.nn.utils.clip_grad_norm(
                    model.parameters(), opt.max_grad_norm)
                after_norm = (sum([
                    p.grad.data.norm(2)**2 for p in model.parameters()
                    if p.grad is not None
                ]))**(1.0 / 2)
                logging.info('clip grad (%f -> %f)' % (pre_norm, after_norm))

            optimizer.step()

            train_losses.append(ml_loss.data[0])

            progbar.update(epoch, batch_i, [('train_loss', ml_loss.data[0]),
                                            ('PPL', ml_loss.data[0])])

            if batch_i > 1 and batch_i % opt.report_every == 0:
                logging.info(
                    '======================  %d  =========================' %
                    (batch_i))

                logging.info('Epoch : %d Minibatch : %d, Loss=%.5f' %
                             (epoch, batch_i, np.mean(ml_loss.data[0])))
                sampled_size = 2
                logging.info(
                    'Printing predictions on %d sampled examples by greedy search'
                    % sampled_size)

                if torch.cuda.is_available():
                    src = src.data.cpu().numpy()
                    decoder_log_probs = decoder_log_probs.data.cpu().numpy()
                    max_words_pred = decoder_log_probs.argmax(axis=-1)
                    trg_target = trg_target.data.cpu().numpy()
                    trg_copy_target = trg_copy_target.data.cpu().numpy()
                else:
                    src = src.data.numpy()
                    decoder_log_probs = decoder_log_probs.data.numpy()
                    max_words_pred = decoder_log_probs.argmax(axis=-1)
                    trg_target = trg_target.data.numpy()
                    trg_copy_target = trg_copy_target.data.numpy()

                sampled_trg_idx = np.random.random_integers(low=0,
                                                            high=len(trg) - 1,
                                                            size=sampled_size)
                src = src[sampled_trg_idx]
                oov_lists = [oov_lists[i] for i in sampled_trg_idx]
                max_words_pred = [max_words_pred[i] for i in sampled_trg_idx]
                decoder_log_probs = decoder_log_probs[sampled_trg_idx]
                if not opt.copy_model:
                    trg_target = [
                        trg_target[i] for i in sampled_trg_idx
                    ]  # use the real target trg_loss (the starting <BOS> has been removed and contains oov ground-truth)
                else:
                    trg_target = [trg_copy_target[i] for i in sampled_trg_idx]

                for i, (src_wi, pred_wi, trg_i, oov_i) in enumerate(
                        zip(src, max_words_pred, trg_target, oov_lists)):
                    nll_prob = -np.sum([
                        decoder_log_probs[i][l][pred_wi[l]]
                        for l in range(len(trg_i))
                    ])
                    find_copy = np.any([x >= opt.vocab_size for x in src_wi])
                    has_copy = np.any([x >= opt.vocab_size for x in trg_i])

                    sentence_source = [
                        opt.id2word[x]
                        if x < opt.vocab_size else oov_i[x - opt.vocab_size]
                        for x in src_wi
                    ]
                    sentence_pred = [
                        opt.id2word[x]
                        if x < opt.vocab_size else oov_i[x - opt.vocab_size]
                        for x in pred_wi
                    ]
                    sentence_real = [
                        opt.id2word[x]
                        if x < opt.vocab_size else oov_i[x - opt.vocab_size]
                        for x in trg_i
                    ]

                    sentence_source = sentence_source[:sentence_source.index(
                        '<pad>'
                    )] if '<pad>' in sentence_source else sentence_source
                    sentence_pred = sentence_pred[:sentence_pred.index(
                        '<pad>'
                    )] if '<pad>' in sentence_pred else sentence_pred
                    sentence_real = sentence_real[:sentence_real.index(
                        '<pad>'
                    )] if '<pad>' in sentence_real else sentence_real

                    logging.info(
                        '==================================================')
                    logging.info('Source: %s ' % (' '.join(sentence_source)))
                    logging.info('\t\tPred : %s (%.4f)' %
                                 (' '.join(sentence_pred), nll_prob) +
                                 (' [FIND COPY]' if find_copy else ''))
                    logging.info('\t\tReal : %s ' % (' '.join(sentence_real)) +
                                 (' [HAS COPY]' +
                                  str(trg_i) if has_copy else ''))

            if total_batch > 1 and total_batch % opt.run_valid_every == 0:
                logging.info('*' * 50)
                logging.info(
                    'Run validing and testing @Epoch=%d,#(Total batch)=%d' %
                    (epoch, total_batch))
                # valid_losses    = _valid_error(valid_data_loader, model, criterion, epoch, opt)
                # valid_history_losses.append(valid_losses)
                valid_score_dict = evaluate_beam_search(
                    generator,
                    valid_data_loader,
                    opt,
                    title='valid',
                    epoch=epoch,
                    save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' %
                    (epoch, batch_i, total_batch))
                test_score_dict = evaluate_beam_search(
                    generator,
                    test_data_loader,
                    opt,
                    title='test',
                    epoch=epoch,
                    save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' %
                    (epoch, batch_i, total_batch))

                checkpoint_names.append('epoch=%d-batch=%d-total_batch=%d' %
                                        (epoch, batch_i, total_batch))
                train_history_losses.append(copy.copy(train_losses))
                valid_history_losses.append(valid_score_dict)
                test_history_losses.append(test_score_dict)
                train_losses = []

                scores = [train_history_losses]
                curve_names = ['Training Error']
                scores += [[
                    result_dict[name] for result_dict in valid_history_losses
                ] for name in opt.report_score_names]
                curve_names += [
                    'Valid-' + name for name in opt.report_score_names
                ]
                scores += [[
                    result_dict[name] for result_dict in test_history_losses
                ] for name in opt.report_score_names]
                curve_names += [
                    'Test-' + name for name in opt.report_score_names
                ]

                scores = [np.asarray(s) for s in scores]
                # Plot the learning curve
                plot_learning_curve(
                    scores=scores,
                    curve_names=curve_names,
                    checkpoint_names=checkpoint_names,
                    title='Training Validation & Test',
                    save_path=opt.exp_path +
                    '/[epoch=%d,batch=%d,total_batch=%d]train_valid_test_curve.png'
                    % (epoch, batch_i, total_batch))
                '''
                determine if early stop training (whether f-score increased, before is if valid error decreased)
                '''
                valid_loss = np.average(
                    valid_history_losses[-1][opt.report_score_names[0]])
                is_best_loss = valid_loss > best_loss
                rate_of_change = float(valid_loss - best_loss) / float(
                    best_loss) if float(best_loss) > 0 else 0.0

                # valid error doesn't increase
                if rate_of_change <= 0:
                    stop_increasing += 1
                else:
                    stop_increasing = 0

                if is_best_loss:
                    logging.info(
                        'Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f'
                        % (best_loss, valid_loss, rate_of_change * 100))
                else:
                    logging.info(
                        'Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f'
                        % (stop_increasing, best_loss, valid_loss,
                           rate_of_change * 100))

                best_loss = max(valid_loss, best_loss)

                # only store the checkpoints that make better validation performances
                if total_batch > 1 and (
                        total_batch % opt.save_model_every == 0 or
                        is_best_loss):  #epoch >= opt.start_checkpoint_at and
                    # Save the checkpoint
                    logging.info('Saving checkpoint to: %s' % os.path.join(
                        opt.save_path,
                        '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' %
                        (opt.exp, epoch, batch_i, total_batch, valid_loss) +
                        '.model'))
                    torch.save(
                        model.state_dict(),
                        open(
                            os.path.join(
                                opt.save_path,
                                '%s.epoch=%d.batch=%d.total_batch=%d' %
                                (opt.exp, epoch, batch_i, total_batch) +
                                '.model'), 'wb'))
                    torch.save((epoch, total_batch, best_loss, stop_increasing,
                                checkpoint_names, train_history_losses,
                                valid_history_losses, test_history_losses),
                               open(
                                   os.path.join(
                                       opt.save_path,
                                       '%s.epoch=%d.batch=%d.total_batch=%d' %
                                       (opt.exp, epoch, batch_i, total_batch) +
                                       '.state'), 'wb'))

                if stop_increasing >= opt.early_stop_tolerance:
                    logging.info(
                        'Have not increased for %d epoches, early stop training'
                        % stop_increasing)
                    early_stop_flag = True
                    break
                logging.info('*' * 50)
Ejemplo n.º 11
0
def main():
    # load settings for training
    parser = argparse.ArgumentParser(
        description='predict.py',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    config.preprocess_opts(parser)
    config.model_opts(parser)
    config.train_opts(parser)
    config.predict_opts(parser)
    opt = parser.parse_args()

    if opt.seed > 0:
        torch.manual_seed(opt.seed)

    print(opt.gpuid)
    if torch.cuda.is_available() and not opt.gpuid:
        opt.gpuid = 0

    opt.exp = 'predict.' + opt.exp
    if hasattr(opt, 'copy_model') and opt.copy_model:
        opt.exp += '.copy'

    if hasattr(opt, 'bidirectional'):
        if opt.bidirectional:
            opt.exp += '.bi-directional'
    else:
        opt.exp += '.uni-directional'

    # fill time into the name
    if opt.exp_path.find('%s') > 0:
        opt.exp_path = opt.exp_path % (opt.exp, opt.timemark)
        opt.pred_path = opt.pred_path % (opt.exp, opt.timemark)

    if not os.path.exists(opt.exp_path):
        os.makedirs(opt.exp_path)
    if not os.path.exists(opt.pred_path):
        os.makedirs(opt.pred_path)

    logging = config.init_logging('train', opt.exp_path + '/output.log')

    logging.info('Parameters:')
    [
        logging.info('%s    :    %s' % (k, str(v)))
        for k, v in opt.__dict__.items()
    ]

    try:
        train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab(
            opt, load_train=False)
        model = init_model(opt)
        # optimizer, criterion = init_optimizer_criterion(model, opt)

        generator = SequenceGenerator(model,
                                      eos_id=opt.word2id[pykp.io.EOS_WORD],
                                      beam_size=opt.beam_size,
                                      max_sequence_length=opt.max_sent_length)

        # import time
        # start_time = time.time()
        evaluate_beam_search(
            generator,
            test_data_loader,
            opt,
            title='predict',
            save_path=opt.pred_path +
            '/[epoch=%d,batch=%d,total_batch=%d]test_result.csv' % (0, 0, 0))
        # print("--- %s seconds --- Complete Beam Search" % (time.time() - start_time))

        # predict_greedy(model, test_data_loader, test_examples, opt)

    except Exception as e:
        logging.exception("message")
Ejemplo n.º 12
0
def main():
    opt = config.init_opt(description='predict.py')

    opt.data = 'data3/kp20k/kp20k'
    opt.vocab = 'data3/kp20k/kp20k.vocab.pt'
    #opt.train_from = 'exp/kp20k.ml.copy.20181129-193506/model/kp20k.ml.copy.epoch=1.batch=20000.total_batch=20000.model'
    opt.train_from = 'exp/kp20k.ml.copy.20181128-153121/model/kp20k.ml.copy.epoch=2.batch=15495.total_batch=38000.model'

    opt.useGpu = 0
    opt.encoder_type = 'rnn'

    opt.useCLF = False

    if opt.encoder_type.startswith('transformer'):
        opt.batch_size = 32
        opt.d_inner = 2048
        opt.enc_n_layers = 4
        opt.dec_n_layers = 2
        opt.n_head = 8
        opt.d_k = 64
        opt.d_v = 64
        opt.d_model = 512
        opt.word_vec_size = 512
        opt.run_valid_every = 5000000
        opt.save_model_every = 20000
        opt.decode_old = True
        # opt.copy_attention = False
    elif opt.encoder_type.startswith('bert'):
        opt.useOnlyTwo = False
        opt.avgHidden = True
        opt.useZeroDecodeHidden = False
        opt.useSameEmbeding = False
        opt.batch_size = 10
        opt.max_sent_length = 10
        opt.run_valid_every = 20000
        opt.decode_old = False
        opt.beam_search_batch_size = 10
        opt.bert_model = 'bert-base-uncased'
        opt.tokenizer = BertTokenizer.from_pretrained(opt.bert_model)
        if opt.encoder_type == 'bert_low':
            opt.copy_attention = False
    else:
        opt.enc_layers = 2
        opt.bidirectional = True
        opt.decode_old = True

    logger = config.init_logging('predict',
                                 opt.exp_path + '/output.log',
                                 redirect_to_stdout=False)

    logger.info('EXP_PATH : ' + opt.exp_path)

    logger.info('Parameters:')
    [
        logger.info('%s    :    %s' % (k, str(v)))
        for k, v in opt.__dict__.items()
    ]

    logger.info(
        '======================  Checking GPU Availability  ========================='
    )
    if torch.cuda.is_available() and opt.useGpu:
        if isinstance(opt.gpuid, int):
            opt.gpuid = [opt.gpuid]
        logger.info('Running on %s! devices=%s' %
                    ('MULTIPLE GPUs' if len(opt.gpuid) > 1 else '1 GPU',
                     str(opt.gpuid)))
    else:
        logger.info('Running on CPU!')

    try:
        test_data_loaders, word2id, id2word, vocab = load_vocab_and_testsets(
            opt)
        model = init_model(opt)
        if torch.cuda.is_available() and opt.useGpu:
            model.cuda()

        generator = SequenceGenerator(model,
                                      opt.word_vec_size if opt.encoder_type
                                      == 'transformer' else opt.vocab_size,
                                      eos_id=opt.word2id[pykp.io.EOS_WORD],
                                      beam_size=opt.beam_size,
                                      max_sequence_length=opt.max_sent_length,
                                      useGpu=opt.useGpu)

        for testset_name, test_data_loader in zip(opt.test_dataset_names,
                                                  test_data_loaders):
            logger.info('Evaluating %s' % testset_name)
            evaluate_beam_search(generator,
                                 test_data_loader,
                                 opt,
                                 title='test_%s' % testset_name,
                                 predict_save_path=opt.pred_path +
                                 '/%s_test_result/' % (testset_name))

    except Exception as e:
        logger.error(e, exc_info=True)