Beispiel #1
0
if cnfg.src_emb is not None:
    logger.info("Load source embedding from: " + cnfg.src_emb)
    load_emb(cnfg.src_emb, mymodel.enc.wemb.weight, nwordi,
             cnfg.scale_down_emb, cnfg.freeze_srcemb)
if cnfg.tgt_emb is not None:
    logger.info("Load target embedding from: " + cnfg.tgt_emb)
    load_emb(cnfg.tgt_emb, mymodel.dec.wemb.weight, nwordt,
             cnfg.scale_down_emb, cnfg.freeze_tgtemb)

if cuda_device:
    mymodel.to(cuda_device)
    lossf.to(cuda_device)

use_amp = cnfg.use_amp and use_cuda
scaler = (MultiGPUGradScaler()
          if multi_gpu_optimizer else GradScaler()) if use_amp else None

if multi_gpu:
    mymodel = DataParallelMT(mymodel,
                             device_ids=cuda_devices,
                             output_device=cuda_device.index,
                             host_replicate=True,
                             gather_output=False)
    lossf = DataParallelCriterion(lossf,
                                  device_ids=cuda_devices,
                                  output_device=cuda_device.index,
                                  replicate_once=True)

if multi_gpu:
    optimizer = mymodel.build_optimizer(
Beispiel #2
0
if cnfg.src_emb is not None:
	logger.info("Load source embedding from: " + cnfg.src_emb)
	load_emb(cnfg.src_emb, mymodel.enc.wemb.weight, nwordi, cnfg.scale_down_emb, cnfg.freeze_srcemb)
if cnfg.tgt_emb is not None:
	logger.info("Load target embedding from: " + cnfg.tgt_emb)
	load_emb(cnfg.tgt_emb, mymodel.dec.wemb.weight, nwordt, cnfg.scale_down_emb, cnfg.freeze_tgtemb)

if cuda_device:
	mymodel.to(cuda_device)
	lossf.to(cuda_device)

optimizer = Optimizer(filter_para_grad(mymodel.parameters()), lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams)
optimizer.zero_grad(set_to_none=optm_step_zero_grad_set_none)

use_amp = cnfg.use_amp and use_cuda
scaler = (MultiGPUGradScaler() if multi_gpu_optimizer else GradScaler()) if use_amp else None

if multi_gpu:
	mymodel = DataParallelMT(mymodel, device_ids=cuda_devices, output_device=cuda_device.index, host_replicate=True, gather_output=False)
	lossf = DataParallelCriterion(lossf, device_ids=cuda_devices, output_device=cuda_device.index, replicate_once=True)

if multi_gpu:
	optimizer = mymodel.build_optimizer(Optimizer, lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams, multi_gpu_optimizer=multi_gpu_optimizer, contiguous_parameters=contiguous_parameters)
else:
	optimizer = Optimizer(get_model_parameters(mymodel, contiguous_parameters=contiguous_parameters), lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams)
optimizer.zero_grad(set_to_none=optm_step_zero_grad_set_none)

lrsch = LRScheduler(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale)

state_holder = None if statesf is None and cnt_states is None else Holder(**{"optm": optimizer, "lrsch": lrsch, "pyrand": PyRandomState(), "thrand": THRandomState(use_cuda=use_cuda)})