def __init__(self, modelfs, fvocab_i, fvocab_t, cnfg, minbsize=1, expand_for_mulgpu=True, bsize=64, maxpad=16, maxpart=4, maxtoken=1536, minfreq=False, vsize=False): vcbi, nwordi = ldvocab(fvocab_i, minfreq, vsize) vcbt, nwordt = ldvocab(fvocab_t, minfreq, vsize) self.vcbi, self.vcbt = vcbi, reverse_dict(vcbt) if expand_for_mulgpu: self.bsize = bsize * minbsize self.maxtoken = maxtoken * minbsize else: self.bsize = bsize self.maxtoken = maxtoken self.maxpad = maxpad self.maxpart = maxpart self.minbsize = minbsize if isinstance(modelfs, (list, tuple)): models = [] for modelf in modelfs: tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) tmp = load_model_cpu(modelf, tmp) tmp.apply(load_fixing) models.append(tmp) model = Ensemble(models) else: model = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) model = load_model_cpu(modelfs, model) model.apply(load_fixing) cuda_device = torch.device(cnfg.gpuid) model.eval() self.use_cuda, self.cuda_device, cuda_devices, self.multi_gpu = parse_cuda_decode( cnfg.use_cuda, cnfg.gpuid, cnfg.multi_gpu_decoding) if self.use_cuda: model.to(self.cuda_device) if self.multi_gpu: model = DataParallelMT(model, device_ids=cuda_devices, output_device=cuda_device.index, host_replicate=True, gather_output=False) self.beam_size = cnfg.beam_size self.length_penalty = cnfg.length_penalty self.net = model
vd = h5py.File(cnfg.dev_data, "r") ntrain = td["ndata"][:].item() nvalid = vd["ndata"][:].item() nword = td["nword"][:].tolist() nwordi, nwordt = nword[0], nword[-1] logger.info("Design models with seed: %d" % torch.initial_seed()) mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) fine_tune_m = cnfg.fine_tune_m tl = [str(i) for i in range(ntrain)] mymodel = init_model_params(mymodel) mymodel.apply(init_fixing) if fine_tune_m is not None: logger.info("Load pre-trained model from: " + fine_tune_m) mymodel = load_model_cpu(fine_tune_m, mymodel) #lw = torch.ones(nwordt).float() #lw[0] = 0.0 #lossf = nn.NLLLoss(lw, ignore_index=0, reduction='sum') lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='sum', forbidden_index=cnfg.forbidden_indexes) if cnfg.src_emb is not None: logger.info("Load source embedding from: " + cnfg.src_emb) load_emb(cnfg.src_emb, mymodel.enc.wemb.weight, nwordi, cnfg.scale_down_emb, cnfg.freeze_srcemb) if cnfg.tgt_emb is not None: logger.info("Load target embedding from: " + cnfg.tgt_emb) load_emb(cnfg.tgt_emb, mymodel.dec.wemb.weight, nwordt, cnfg.scale_down_emb, cnfg.freeze_tgtemb)