Exemple #1
0
        tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
                  cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
                  cache_len_default, cnfg.attn_hsize, cnfg.norm_output,
                  cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

        tmp = load_model_cpu(modelf, tmp)
        tmp.apply(load_fixing)

        models.append(tmp)
    mymodel = Ensemble(models)

mymodel.eval()

lossf = LabelSmoothingLoss(nwordt,
                           cnfg.label_smoothing,
                           ignore_index=pad_id,
                           reduction='none',
                           forbidden_index=cnfg.forbidden_indexes)

use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda(
    cnfg.use_cuda, cnfg.gpuid)
use_amp = cnfg.use_amp and use_cuda

# Important to make cudnn methods deterministic
set_random_seed(cnfg.seed, use_cuda)

if use_cuda:
    mymodel.to(cuda_device)
    lossf.to(cuda_device)
    if multi_gpu:
        mymodel = DataParallelMT(mymodel,
              cnfg.bindDecoderEmb,
              cnfg.forbidden_indexes,
              ntask=ntask)

fine_tune_m = cnfg.fine_tune_m

mymodel = init_model_params(mymodel)
mymodel.apply(init_fixing)
if fine_tune_m is not None:
    logger.info("Load pre-trained model from: " + fine_tune_m)
    mymodel = load_model_cpu(fine_tune_m, mymodel)
    mymodel.apply(load_fixing)

lossf = LabelSmoothingLoss(nwordt,
                           cnfg.label_smoothing,
                           ignore_index=pad_id,
                           reduction='sum',
                           forbidden_index=cnfg.forbidden_indexes)

if cnfg.src_emb is not None:
    logger.info("Load source embedding from: " + cnfg.src_emb)
    load_emb(cnfg.src_emb, mymodel.enc.wemb.weight, nwordi,
             cnfg.scale_down_emb, cnfg.freeze_srcemb)
if cnfg.tgt_emb is not None:
    logger.info("Load target embedding from: " + cnfg.tgt_emb)
    load_emb(cnfg.tgt_emb, mymodel.dec.wemb.weight, nwordt,
             cnfg.scale_down_emb, cnfg.freeze_tgtemb)

if cuda_device:
    mymodel.to(cuda_device)
    lossf.to(cuda_device)