def ltm(opt, model_path=None): if model_path is None: model_path = opt.models[0] checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) model_opt = ArgumentParser.ckpt_model_opts(checkpoint["opt"]) ArgumentParser.update_model_opts(model_opt) ArgumentParser.validate_model_opts(model_opt) vocab = checkpoint["vocab"] if inputters.old_style_vocab(vocab): fields = inputters.load_old_vocab(vocab, opt.data_type, dynamic_dict=model_opt.copy_attn) else: fields = vocab # This will randomly initialize if settings.RANDOM_WEIGHTS: checkpoint = None model = build_base_model(model_opt, fields, use_gpu(opt), checkpoint, opt.gpu) if opt.fp32: model.float() model.eval() model.generator.eval() return fields, model, model_opt
def load_model(checkpoint, fields, k=0, bisect_iter=0, gpu=False): model_opt = checkpoint["opt"] if model_opt.generator_function == "esoftmax": gen_func = onmt.modules.sparse_activations.ESoftmax(dim=-1) else: alpha_lookup = {"softmax": 1.0, "tsallis15": 1.5, "sparsemax": 2.0} if not hasattr(model_opt, "loss_alpha"): model_opt.loss_alpha = alpha_lookup[model_opt.generator_function] gen_alpha = alpha_lookup.get(model_opt.generator_function, model_opt.loss_alpha) if not hasattr(model_opt, "global_attention_alpha"): model_opt.global_attention_alpha = alpha_lookup[ model_opt.global_attention_function] if not hasattr(model_opt, "global_attention_bisect_iter"): model_opt.global_attention_bisect_iter = 0 model = build_base_model(model_opt, fields, gpu, checkpoint) assert (opt.k == 0 or opt.bisect_iter == 0), "Bisection and topk are mutually exclusive ! !" if gen_alpha == 1.0: gen_func = nn.Softmax(dim=-1) elif gen_alpha == 2.0: if k > 0: gen_func = onmt.modules.sparse_activations.SparsemaxTopK( dim=-1, k=k) elif bisect_iter > 0: gen_func = onmt.modules.sparse_activations.SparsemaxBisect( n_iter=bisect_iter) else: gen_func = onmt.modules.sparse_activations.Sparsemax(dim=-1) elif gen_alpha == 1.5 and bisect_iter == 0: if k > 0: gen_func = onmt.modules.sparse_activations.Tsallis15TopK( dim=-1, k=k) else: gen_func = onmt.modules.sparse_activations.Tsallis15(dim=-1) else: # generic tsallis with bisection assert bisect_iter > 0, "Must use bisection with alpha != 1,1.5,2" gen_func = onmt.modules.sparse_activations.TsallisBisect( alpha=gen_alpha, n_iter=bisect_iter) gen_weights = (model.generator[0] if isinstance( model.generator, nn.Sequential) else model.generator) generator = nn.Sequential(gen_weights, gen_func) model.generator = generator model.eval() model.generator.eval() return model
def load_model(self, model, use_gpu=False, gpu_device=-1, fp32=False): checkpoint = torch.load(model, map_location=lambda storage, loc: storage) model_opt = ArgumentParser.ckpt_model_opts(checkpoint['opt']) ArgumentParser.update_model_opts(model_opt) ArgumentParser.validate_model_opts(model_opt) fields = checkpoint['vocab'] model = build_base_model( model_opt, fields, use_gpu, checkpoint, gpu_device) # use_gpu = True/False, gpu_device = int/None if fp32: model.float() model.eval() model.generator.eval() return fields, model, model_opt
def __init__(self, model_file, temp_dir, logger, silent=False): self.model_file = model_file self.temp_dir = temp_dir if not silent: logger.info("Start loading the model") # checkpoint = torch.load(self.model_file, # map_location=lambda storage, location: 'cpu') checkpoint = torch.load(self.model_file, map_location='cpu') # fields = inputters.load_fields_from_vocab(checkpoint['vocab']) fields = inputters.load_fields_from_vocab(checkpoint['vocab']) model_opt = checkpoint['opt'] # OpenNMT changed their configuration... model_opt.enc_rnn_size = model_opt.rnn_size model_opt.dec_rnn_size = model_opt.rnn_size # model_opt.max_relative_positions = 0 # default is 0 # model_opt.model_dtype = 'fp32' # not half model model = build_base_model(model_opt, fields, gpu=False, checkpoint=checkpoint) model.eval() model.generator.eval() self.model = model self.fields = fields self.model_opt = model_opt if not os.path.exists(pjoin(self.temp_dir, "l2e")): os.makedirs(pjoin(self.temp_dir, "l2e")) if not silent: logger.info("Model built")
model_opt = checkpoint['opt'] fields = checkpoint['vocab'] if opt.cuda: torch.cuda.set_device(0) cur_device = "cuda" gpu_id = 0 else: cur_device = "cpu" gpu_id = None print("current device: ", cur_device) from onmt import model_builder model = model_builder.build_base_model(model_opt, fields, cur_device == 'gpu', checkpoint, gpu_id) model.to(cur_device) model.eval() from apply_bpe import BPE import codecs codes = codecs.open( "/scratch/project_2001970/AleModel/bpe-model.de-en-35k.wmt19-news-para.norm.tok.tc", encoding='utf-8') bpe = BPE(codes) def prepare(params, samples): #_, params.word2id = create_dictionary(samples)
def __init__(self, model_dir): # Model dir self._model_dir = os.path.abspath(model_dir) if not os.path.isdir(self._model_dir): msg = f"{model_dir} doesn't exists'" raise ValueError(msg) # Extended model self._extended_model = ExtendedModel(model_dir) # Config self._config = self._extended_model.config # Options self._opts = self._config.opts # Get the model options model_path = self._opts.models[0] checkpoint = torch.load( model_path, map_location=lambda storage, loc: storage ) self._model_opts = ArgumentParser.ckpt_model_opts(checkpoint["opt"]) ArgumentParser.update_model_opts(self._model_opts) ArgumentParser.validate_model_opts(self._model_opts) # Extract vocabulary vocab = checkpoint["vocab"] if inputters.old_style_vocab(vocab): self._fields = inputters.load_old_vocab( vocab, "text", dynamic_dict=False ) else: self._fields = vocab # Train_steps self._train_steps = self._model_opts.train_steps # Build openmmt model self._opennmt_model = build_base_model( self._model_opts, self._fields, use_gpu(self._opts), checkpoint, self._opts.gpu, ) # Translator try: min_length = self._opts.min_length except: min_length = 0 try: max_length = self._opts.max_length except: max_length = 100 try: beam_size = self._opts.beam_size except: beam_size = 5 try: replace_unk = self._opts.replace_unk except: replace_unk = 0 self._translator = Translator( self._opennmt_model, self._fields, TextDataReader(), TextDataReader(), gpu=self._opts.gpu, min_length=min_length, max_length=max_length, beam_size=beam_size, replace_unk=replace_unk, copy_attn=self._model_opts.copy_attn, global_scorer=GNMTGlobalScorer(0.0, -0.0, "none", "none"), seed=self.SEED, ) online_learning = self._config.online_learning if online_learning: # Optim optimizer_opt = type("", (), {})() optimizer_opt.optim = "sgd" optimizer_opt.learning_rate = self._opts.learning_rate optimizer_opt.train_from = "" optimizer_opt.adam_beta1 = 0 optimizer_opt.adam_beta2 = 0 optimizer_opt.model_dtype = "fp32" optimizer_opt.decay_method = "none" optimizer_opt.start_decay_steps = 100000 optimizer_opt.learning_rate_decay = 1.0 optimizer_opt.decay_steps = 100000 optimizer_opt.max_grad_norm = 5 self._optim = Optimizer.from_opt( self._opennmt_model, optimizer_opt, checkpoint=None ) trainer_opt = type("", (), {})() trainer_opt.lambda_coverage = 0.0 trainer_opt.copy_attn = False trainer_opt.label_smoothing = 0.0 trainer_opt.truncated_decoder = 0 trainer_opt.model_dtype = "fp32" trainer_opt.max_generator_batches = 32 trainer_opt.normalization = "sents" trainer_opt.accum_count = [1] trainer_opt.accum_steps = [0] trainer_opt.world_size = 1 trainer_opt.average_decay = 0 trainer_opt.average_every = 1 trainer_opt.dropout = 0 trainer_opt.dropout_steps = (0,) trainer_opt.gpu_verbose_level = 0 trainer_opt.early_stopping = 0 trainer_opt.early_stopping_criteria = (None,) trainer_opt.tensorboard = False trainer_opt.report_every = 50 trainer_opt.gpu_ranks = [] if self._opts.gpu != -1: trainer_opt.gpu_ranks = [self._opts.gpu] self._trainer = build_trainer( trainer_opt, self._opts.gpu, self._opennmt_model, self._fields, self._optim, ) else: self._trainer = None
def __init__(self, model_dir): # Model dir self._model_dir = os.path.abspath(model_dir) if not os.path.isdir(self._model_dir): msg = f"{model_dir} doesn't exists'" raise ValueError(msg) # Extended model self._extended_model = ExtendedModel(model_dir) # Config self._config = self._extended_model.config # Options self._opts = self._config.opts # Get the model options model_path = self._opts.models[0] checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) self._model_opts = ArgumentParser.ckpt_model_opts(checkpoint['opt']) ArgumentParser.update_model_opts(self._model_opts) ArgumentParser.validate_model_opts(self._model_opts) # Train_steps self._train_steps = self._model_opts.train_steps # Extract vocabulary vocab = checkpoint['vocab'] if inputters.old_style_vocab(vocab): self._fields = inputters.load_old_vocab( vocab, self._opts.data_type, dynamic_dict=self._model_opts.copy_attn) else: self._fields = vocab # Build model self._model = build_base_model(self._model_opts, self._fields, use_gpu(self._opts), checkpoint, self._opts.gpu) if self._opts.fp32: self._model.float() #Translator scorer = GNMTGlobalScorer.from_opt(self._opts) self.translator = OnmtxTranslator.from_opt( self._model, self._fields, self._opts, self._model_opts, global_scorer=scorer, out_file=None, report_score=False, logger=None, ) # Create trainer self._optim = Optimizer.from_opt(self._model, self._opts, checkpoint=checkpoint) device_id = -1 # TODO Handle GPU self.trainer = build_trainer(self._opts, device_id, self._model, self._fields, self._optim)