def __init__(self, modelfs, fvocab_i, fvocab_t, cnfg, minbsize=1, expand_for_mulgpu=True, bsize=64, maxpad=16, maxpart=4, maxtoken=1536, minfreq = False, vsize = False): vcbi, nwordi = ldvocab(fvocab_i, minfreq, vsize) vcbt, nwordt = ldvocab(fvocab_t, minfreq, vsize) self.vcbi, self.vcbt = vcbi, reverse_dict(vcbt) if expand_for_mulgpu: self.bsize = bsize * minbsize self.maxtoken = maxtoken * minbsize else: self.bsize = bsize self.maxtoken = maxtoken self.maxpad = maxpad self.maxpart = maxpart self.minbsize = minbsize if isinstance(modelfs, (list, tuple)): models = [] for modelf in modelfs: tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) tmp = load_model_cpu(modelf, tmp) tmp.apply(load_fixing) models.append(tmp) model = Ensemble(models) else: model = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) model = load_model_cpu(modelfs, model) model.apply(load_fixing) model.eval() self.use_cuda, self.cuda_device, cuda_devices, self.multi_gpu = parse_cuda_decode(cnfg.use_cuda, cnfg.gpuid, cnfg.multi_gpu_decoding) if self.use_cuda: model.to(self.cuda_device) if self.multi_gpu: model = DataParallelMT(model, device_ids=cuda_devices, output_device=self.cuda_device.index, host_replicate=True, gather_output=False) self.use_amp = cnfg.use_amp and self.use_cuda self.beam_size = cnfg.beam_size self.length_penalty = cnfg.length_penalty self.net = model
else: cuda_device = torch.device(gpuid) multi_gpu = False cuda_devices = None torch.cuda.set_device(cuda_device.index) else: cuda_device = False multi_gpu = False cuda_devices = None if cuda_device: mymodel.to(cuda_device) if multi_gpu: mymodel = DataParallelMT(mymodel, device_ids=cuda_devices, output_device=cuda_device.index, host_replicate=True, gather_output=False) beam_size = cnfg.beam_size length_penalty = cnfg.length_penalty src_grp = td["src"] with torch.no_grad(): for i in tqdm(range(ntest), mininterval=tqdm_mininterval): seq_batch = torch.from_numpy(src_grp[str(i)][:]) if cuda_device: seq_batch = seq_batch.to(cuda_device) seq_batch = seq_batch.long() output = mymodel.decode(seq_batch, beam_size, None, length_penalty)