def load_test_model(opt, dummy_opt): checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) fields = onmt.io.load_fields_from_vocab(checkpoint['vocab'], data_type=opt.data_type) model_opt = checkpoint['opt'] for arg in dummy_opt: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt[arg] if 'multimodal_model_type' in opt: if opt.multimodal_model_type in MODEL_TYPES: print('Building variational multi-modal model...') model = make_vi_model_mmt(model_opt, fields, use_gpu(opt), checkpoint) else: print('Building multi-modal model...') model = make_base_model_mmt(model_opt, fields, use_gpu(opt), checkpoint) else: print('Building text-only model...') model = make_base_model(model_opt, fields, use_gpu(opt), checkpoint) model.eval() model.generator.eval() return fields, model, model_opt
def get_generator(model, vocab, scorer, args, model_args): from onmt.Utils import use_gpu if args.sample: if model_args.model == 'lf2lf': generator = LFSampler(model, vocab, args.temperature, max_length=args.max_length, cuda=use_gpu(args)) else: generator = Sampler(model, vocab, args.temperature, max_length=args.max_length, cuda=use_gpu(args)) else: generator = Generator(model, vocab, beam_size=args.beam_size, n_best=args.n_best, max_length=args.max_length, global_scorer=scorer, cuda=use_gpu(args), min_length=args.min_length) return generator
def __init__(self, opt, dummy_opt={}): # Add in default model arguments, possibly added since training. self.opt = opt checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) self.fields = onmt.IO.load_fields(checkpoint['vocab']) model_opt = checkpoint['opt'] for arg in dummy_opt: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt[arg] self._type = model_opt.encoder_type self.copy_attn = model_opt.copy_attn self.model = onmt.ModelConstructor.make_base_model( model_opt, self.fields, use_gpu(opt), checkpoint) self.model.eval() self.model.generator.eval() # Length + Coverage Penalty self.alpha = opt.alpha self.beta = opt.beta # for debugging self.beam_accum = None
def make_loss_compute(model, tgt_vocab, opt, train=True): """ This returns user-defined LossCompute object, which is used to compute loss in train/validate process. You can implement your own *LossCompute class, by subclassing LossComputeBase. """ if opt.copy_attn: compute = onmt.modules.CopyGeneratorLossCompute( model.generator, tgt_vocab, opt.copy_attn_force, opt.copy_loss_by_seqlength) else: if opt.ReWE: compute = onmt.Loss.NMTLossCompute( model.generator, tgt_vocab, label_smoothing=opt.label_smoothing if train else 0.0, emb_ReWE=opt.ReWE, generator_ReWE=model.generator_ReWE,dec_embeddings=model.decoder.embeddings,lambda_loss=opt.lambda_loss, ReWE_loss=opt.ReWE_loss,only_ReWE=opt.contrastive_B) else: compute = onmt.Loss.NMTLossCompute( model.generator, tgt_vocab, label_smoothing=opt.label_smoothing if train else 0.0, emb_ReWE=opt.ReWE) if use_gpu(opt): compute.cuda() return compute
def evaluate(self, opt, model_opt, data, split='test'): text_generator = FBnegSampler(self.model, self.mappings['tgt_vocab'], opt.temperature, opt.max_length, use_gpu(opt)) # Statistics counter = count(1) pred_score_total, pred_words_total = 0, 0 gold_score_total, gold_words_total = 0, 0 data_iter = data.generator(split, shuffle=False) num_batches = data_iter.next() dec_state = None for batch in data_iter: if batch is None: dec_state = None continue elif not self.model.stateful: dec_state = None enc_state = dec_state.hidden if dec_state is not None else None batch_data = text_generator.generate_batch(batch, model_opt.model, gt_prefix=self.gt_prefix, enc_state=enc_state) utterances = self.builder.from_batch(batch_data) selections = batch_data["selections"] for i, response in enumerate(utterances): pred_score_total += response.pred_scores[0] pred_words_total += len(response.pred_sents[0]) gold_score_total += response.gold_score gold_words_total += len(response.gold_sent) if opt.verbose: counter = self.print_results(model_opt, batch, counter, selections, utterances)
def make_loss_compute(model, tgt_vocab, dataset, opt, validation=False): """ This returns user-defined LossCompute object, which is used to compute loss in train/validate process. You can implement your own *LossCompute class, by subclassing LossComputeBase. """ # only use label smoothing / regularization for training (i.e. validation=False) if opt.copy_attn: if opt.KL_aux_MT_smoothing: raise NotImplementedError('label smoothing ot supported for copy_attn') compute = onmt.modules.CopyGeneratorLossCompute( model.generator, tgt_vocab, dataset, opt.copy_attn_force) elif opt.KL_aux_MT_smoothing and not validation: print('using aux NMT label smoothing, eps=', opt.KL_smoothing_epsilon) compute = onmt.Loss.NMTKLDivNMTLossCompute(model.generator, tgt_vocab, smoothing_epsilon=opt.KL_smoothing_epsilon, aux_checkpoint=opt.KL_aux_MT_smoothing) else: compute = onmt.Loss.NMTLossCompute(model.generator, tgt_vocab) if use_gpu(opt): print('calling compute.cuda()') sys.stdout.flush() compute.cuda() print('done with compute.cuda()') sys.stdout.flush() return compute
def make_loss_compute(model, tgt_vocab, opt, train=True): """ This returns user-defined LossCompute object, which is used to compute loss in train/validate process. You can implement your own *LossCompute class, by subclassing LossComputeBase. """ if opt.copy_attn: compute = onmt.modules.CopyGeneratorLossCompute( model.generator, tgt_vocab, opt.copy_attn_force, opt.copy_loss_by_seqlength) elif train and opt.type_weighting_loss is not None: with open(opt.type_weighting_loss, 'r') as fobj: weighted = set(line.strip() for line in fobj) tgt_vocab_weights = [ 2. if word in weighted else 1. for word in tgt_vocab.itos ] compute = onmt.Loss.TypeWeightingLossCompute( model.generator, tgt_vocab, tgt_vocab_weights=tgt_vocab_weights, label_smoothing=opt.label_smoothing) else: compute = onmt.Loss.NMTLossCompute( model.generator, tgt_vocab, label_smoothing=opt.label_smoothing if train else 0.0) if use_gpu(opt): compute.cuda() return compute
def make_encoder(opt, embeddings): """ Various encoder dispatcher function. Args: opt: the option in current environment. embeddings (Embeddings): vocab embeddings for this encoder. """ if opt.encoder_type == "transformer": return TransformerEncoder(opt.enc_layers, opt.rnn_size, opt.dropout, embeddings) elif opt.encoder_type == "cnn": return CNNEncoder(opt.enc_layers, opt.rnn_size, opt.cnn_kernel_width, opt.dropout, embeddings) elif opt.encoder_type == "mean": return MeanEncoder(opt.enc_layers, embeddings) else: # "rnn" or "brnn" return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.rnn_size, opt.dropout, embeddings, opt.bridge, opt.elmo, opt.elmo_size, opt.elmo_options, opt.elmo_weight, opt.subword_elmo, opt.subword_elmo_size, opt.subword_elmo_options, opt.subword_weight, opt.subword_spm_model, opt.node2vec, opt.node2vec_emb_size, opt.node2vec_weight, use_gpu(opt))
def load_semi_sup_test_model(opt, dummy_opt): checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) fields = onmt.io.load_fields_from_vocab(checkpoint['vocab'], data_type=opt.data_type) text_fields = onmt.io.load_fields_from_vocab(torch.load(opt.vocab), "text") text_fields['tgt'] = fields['tgt'] print(' * vocabulary size. source = %d; target = %d' % (len(text_fields['src'].vocab), len(text_fields['tgt'].vocab))) model_opt = checkpoint['opt'] for arg in dummy_opt: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt[arg] model, text_model, _ = make_audio_text_model(model_opt, fields, text_fields, use_gpu(opt), checkpoint) model.eval() text_model.eval() model.generator.eval() text_model.generator.eval() return fields, text_fields, model, text_model, model_opt
def load_test_model(model_path, opt, dummy_opt): checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] for arg in dummy_opt: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt[arg] for attribute in ["share_embeddings", "stateful"]: if not hasattr(model_opt, attribute): model_opt.__dict__[attribute] = False # TODO: fix this if model_opt.stateful and not opt.sample: raise ValueError( 'Beam search generator does not work with stateful models yet') mappings = read_pickle('{}/vocab.pkl'.format(model_opt.mappings)) # mappings = read_pickle('{0}/{1}/vocab.pkl'.format(model_opt.mappings, model_opt.model)) mappings = make_model_mappings(model_opt.model, mappings) model = make_base_model(model_opt, mappings, use_gpu(opt), checkpoint) model.eval() model.generator.eval() return mappings, model, model_opt
def make_loss_compute(model, tgt_vocab, opt, train=True, fields=None): """ This returns user-defined LossCompute object, which is used to compute loss in train/validate process. You can implement your own *LossCompute class, by subclassing LossComputeBase. """ if opt.copy_attn: if opt.obj_f == "rl" and train: compute = onmt.modules.RLGeneratorLossCompute( model.generator, tgt_vocab, opt.copy_attn_force, opt.copy_loss_by_seqlength, opt.copy_attn) elif opt.obj_f =="hybrid" and train: # need to add copy info compute = onmt.modules.HybridLossCompute( model.generator, tgt_vocab, opt.copy_attn_force, opt.copy_loss_by_seqlength, opt.apply_factor) else: compute = onmt.modules.CopyGeneratorLossCompute( model.generator, tgt_vocab, opt.copy_attn_force, opt.copy_loss_by_seqlength) else: # idf class weigths if opt.obj_f == "ml" and opt.idf_class_weights and fields is not None: idf = onmt.modules.Idf(revision_num = opt.idf_revision_num) words = [ fields["tgt"].vocab.itos[i] for i in range(len(fields["tgt"].vocab)) ] words_df_weights = idf.get_idf_weights(None, words, revision=False if opt.idf_revision_num == 0 else True) elif opt.obj_f == "rl" and train: compute = onmt.modules.RLGeneratorLossCompute( model.generator, tgt_vocab, opt.copy_attn_force, opt.copy_loss_by_seqlength, opt.copy_attn) if use_gpu(opt): compute.cuda() return compute else: words_df_weights = None compute = onmt.Loss.NMTLossCompute( model.generator, tgt_vocab, label_smoothing=opt.label_smoothing if train else 0.0, initial_weight=words_df_weights) if use_gpu(opt): compute.cuda() return compute
def __init__(self, opt, dummy_opt={}): # Add in default model arguments, possibly added since training. self.opt = opt checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) self.fields = onmt.IO.ONMTDataset.load_fields(checkpoint['vocab']) model_opt = checkpoint['opt'] for arg in dummy_opt: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt[arg] self._type = model_opt.encoder_type self.copy_attn = model_opt.copy_attn self.ensemble = model_opt.ensemble self.fix_id = opt.ensemble_fixed_idx if self.ensemble: print("Building Ensemble...") models = [] for i in range(model_opt.ensemble_num): print("Building Model {}".format(i + 1)) models.append( onmt.ModelConstructor.make_base_model(model_opt, self.fields, use_gpu(opt), checkpoint=None)) self.model = onmt.Models.Ensemble(models) self.model.load_state_dict(checkpoint['model']) # Fix use only one model if self.fix_id > -1: print("USE ONLY MODEL {}".format(self.fix_id)) self.model = self.model.models[self.fix_id] if use_gpu(opt): self.model.cuda() else: self.model.cpu() else: self.model = onmt.ModelConstructor.make_base_model( model_opt, self.fields, use_gpu(opt), checkpoint) self.model.generator.eval() self.model.eval() # for debugging self.beam_accum = None
def build_model(model_opt, opt, fields, checkpoint): print('Building model...') model = onmt.ModelConstructor.make_base_model(model_opt, fields, use_gpu(opt), checkpoint) if len(opt.gpuid) > 1: print('Multi gpu training: ', opt.gpuid) model = nn.DataParallel(model, device_ids=opt.gpuid, dim=1) print(model) return model
def load_model(opt): checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) fields = onmt.IO.load_fields(checkpoint['vocab']) model_opt = checkpoint['opt'] model = onmt.ModelConstructor.make_base_model(model_opt, fields, use_gpu(opt), checkpoint) return model, fields
def load_test_model(opt, dummy_opt): checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) fields = onmt.io.load_fields_from_vocab(checkpoint['vocab'], data_type=opt.data_type) model_opt = checkpoint['opt'] for arg in dummy_opt: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt[arg] if opt.RL_algorithm is None: model = make_base_model(model_opt, fields, use_gpu(opt), checkpoint) else: model = make_RL_model(model_opt, fields, use_gpu(opt), checkpoint) model.eval() model.generator.eval() return fields, model, model_opt
def build_multiencoder_model(model_opt, opt, fields_dict): print('Building Model...') model = onmt.ModelConstructorForMultiEncoder.make_multiencoder_model( model_opt, fields_dict, use_gpu(opt)) if len(opt.gpuid) > 1: print('Multi gpu training: ', opt.gpuid) model = nn.DataParallel(model, device_ids=opt.gpuid, dim=1) print(model) return model
def build_model_mine(self, model_opt, opt, fields, checkpoint, logger): logger.info('Building model...') model = onmt.ModelConstructor.make_base_model(model_opt, fields, use_gpu(opt), checkpoint) if len(opt.gpuid) > 1: logger.info('Multi gpu training: ', opt.gpuid) model = nn.DataParallel(model, device_ids=opt.gpuid, dim=1) logger.info(model) return model
def lazy_dataset_loader(pt_file, corpus_type): if use_gpu(opt): device_id = opt.gpu if hasattr(opt, 'gpu') else opt.gpuid[0] device = 'cuda:' + str(device_id) else: device = 'cpu' dataset = torch.load(pt_file, map_location=device) logger.info('Loading %s dataset from %s, number of examples: %d' % (corpus_type, pt_file, len(dataset))) return dataset
def build_model(model_opt, opt, fields, checkpoint): print('Building model...') model1 = onmt.ModelConstructor.make_base_model(model_opt, fields, use_gpu(opt), checkpoint, stage1=True) model2 = onmt.ModelConstructor.make_base_model(model_opt, fields, use_gpu(opt), checkpoint, stage1=False) if len(opt.gpuid) > 1: print('Multi gpu training: ', opt.gpuid) model1 = nn.DataParallel(model1, device_ids=opt.gpuid, dim=1) model2 = nn.DataParallel(model2, device_ids=opt.gpuid, dim=1) print(model1) print(model2) return model1, model2
def make_critic_loss_compute(ys, values_fit, values_optim, train=True): """ This returns user-defined LossCompute object, which is used to compute loss in train/validate process. You can implement your own *LossCompute class, by subclassing LossComputeBase. """ compute = onmt.Loss.DDPGLossComputeCritic(ys, values_fit, values_optim) if use_gpu(opt): compute.cuda() return compute
def learn(self, opt, data, report_func): """Train model. Args: opt(namespace) model(Model) data(DataGenerator) """ print('\nStart training...') print(' * number of epochs: %d' % opt.epochs) print(' * batch size: %d' % opt.batch_size) for epoch in range(opt.epochs): print('') # 1. Train for one epoch on the training set. train_iter = data.generator('train', cuda=use_gpu(opt)) train_stats = self.train_epoch(train_iter, opt, epoch, report_func) print('Train loss: %g' % train_stats.mean_loss()) print('Train accuracy: %g' % train_stats.accuracy()) # 2. Validate on the validation set. valid_iter = data.generator('dev', cuda=use_gpu(opt)) valid_stats = self.validate(valid_iter) print('Validation loss: %g' % valid_stats.mean_loss()) print('Validation accuracy: %g' % valid_stats.accuracy()) # 3. Log to remote server. #if opt.exp_host: # train_stats.log("train", experiment, optim.lr) # valid_stats.log("valid", experiment, optim.lr) #if opt.tensorboard: # train_stats.log_tensorboard("train", writer, optim.lr, epoch) # train_stats.log_tensorboard("valid", writer, optim.lr, epoch) # 4. Update the learning rate self.epoch_step(valid_stats.ppl(), epoch) # 5. Drop a checkpoint if needed. if epoch >= opt.start_checkpoint_at: self.drop_checkpoint(opt, epoch, valid_stats)
def load_test_model(opt, dummy_opt, model_path=None): if model_path is None: model_path = opt.model checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) fields = onmt.io.load_fields_from_vocab(checkpoint['vocab'], data_type=opt.data_type) model_opt = checkpoint['opt'] for arg in dummy_opt: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt[arg] if opt.data_type == 'e2e': model = make_e2e_model(model_opt, fields, use_gpu(opt), checkpoint) model.src_generator.eval() model.tgt_generator.eval() else: model = make_base_model(model_opt, fields, use_gpu(opt), checkpoint) model.generator.eval() model.eval() return fields, model, model_opt
def load_test_model(opt, dummy_opt): checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) fields = onmt.io.load_fields_from_vocab( checkpoint['vocab'], data_type=opt.data_type) model_opt = checkpoint['opt'] for arg in dummy_opt: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt[arg] if model_opt.use_char_composition: #fields2 = onmt.io.load_fields_from_vocab(torch.load(model_opt.data + '.vocab.pt'), opt.data_type) tgt_char_field = fields['tgt_char'] spelling = torch.load(model_opt.data + '.spelling.pt') model = make_base_model(model_opt, fields, use_gpu(opt), checkpoint, spelling, tgt_char_field) else: model = make_base_model(model_opt, fields, use_gpu(opt), checkpoint) model.eval() model.generator.eval() return fields, model, model_opt
def build_model(model_opt, opt, mappings, checkpoint): print 'Building model...' model = model_builder.make_base_model(model_opt, mappings, use_gpu(opt), checkpoint=checkpoint) if len(opt.gpuid) > 1: print('Multi gpu training: ', opt.gpuid) model = nn.DataParallel(model, device_ids=opt.gpuid, dim=1) if opt.verbose: print(model) return model
def main(): # Load train and validate data. print("Loading train and validate data from '%s'" % opt.data) train = torch.load(opt.data + '.train.pt') valid = torch.load(opt.data + '.valid.pt') print(' * number of training sentences: %d' % len(train)) print(' * maximum batch size: %d' % opt.batch_size) # Load checkpoint if we resume from a previous training. if opt.train_from: print('Loading checkpoint from %s' % opt.train_from) checkpoint = torch.load(opt.train_from, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] # I don't like reassigning attributes of opt: it's not clear opt.start_epoch = checkpoint['epoch'] + 1 else: checkpoint = None model_opt = opt # Load fields generated from preprocess phase. fields = load_fields(train, valid, checkpoint) # Collect features. src_features = collect_features(train, fields) for j, feat in enumerate(src_features): print(' * src feature %d size = %d' % (j, len(fields[feat].vocab))) # Build model. model = build_model(model_opt, opt, fields, checkpoint) tally_parameters(model) check_save_model_path() # Build optimizer. optim = build_optim(model, checkpoint) print(opt) # Do training. teacher_checkpoint = torch.load(opt.teacher_model, map_location=lambda storage, loc: storage) teacher_fields = onmt.IO.ONMTDataset.load_fields(teacher_checkpoint['vocab']) teacher_model_opt = teacher_checkpoint['opt'] teacher_model = onmt.ModelConstructor.make_base_model( teacher_model_opt, teacher_fields, use_gpu(opt), teacher_checkpoint) teacher_model.eval() teacher_model.generator.eval() print(teacher_model) train_model(model, train, valid, fields, optim, teacher_model)
def build_model(model_opt, opt, fields, text_fields, checkpoint): print('Building model...') model, text_model, speech_model = onmt.ModelConstructor.make_audio_text_model_from_text(model_opt, fields, text_fields, use_gpu(opt), checkpoint) if len(opt.gpuid) > 1: print('Multi gpu training: ', opt.gpuid) model = nn.DataParallel(model, device_ids=opt.gpuid, dim=1) text_model = nn.DataParallel(text_model, device_ids=opt.gpuid, dim=1) print(model) print print(text_model) print print(speech_model) return model, text_model, speech_model
def make_loss_compute(model, tgt_vocab, opt, train=True): """ This returns user-defined LossCompute object, which is used to compute loss in train/validate process. You can implement your own *LossCompute class, by subclassing LossComputeBase. """ compute = onmt.Loss.NMTLossCompute( model.generator, tgt_vocab, label_smoothing=opt.label_smoothing if train else 0.0) if use_gpu(opt): compute.cuda() return compute
def make_loss_compute(model, tgt_vocab, dataset, opt, teacher_model): """ This returns user-defined LossCompute object, which is used to compute loss in train/validate process. You can implement your own *LossCompute class, by subclassing LossComputeBase. """ if opt.copy_attn: compute = onmt.modules.CopyGeneratorLossCompute( model.generator, tgt_vocab, dataset, opt.copy_attn_force) else: compute = onmt.Loss.NMTLossCompute(model.generator, tgt_vocab, teacher_model.generator, model.logsoftmax, teacher_model.logsoftmax) if use_gpu(opt): compute.cuda() return compute
def __init__(self, args, schema, price_tracker, model_path, timed): super(PytorchNeuralSystem, self).__init__() self.schema = schema self.price_tracker = price_tracker self.timed_session = timed # TODO: do we need the dummy parser? dummy_parser = argparse.ArgumentParser(description='duh') options.add_model_arguments(dummy_parser) options.add_data_generator_arguments(dummy_parser) dummy_args = dummy_parser.parse_known_args([])[0] # Load the model. mappings, model, model_args = model_builder.load_test_model( model_path, args, dummy_args.__dict__) self.model_name = model_args.model vocab = mappings['utterance_vocab'] self.mappings = mappings generator = get_generator(model, vocab, Scorer(args.alpha), args, model_args) builder = UtteranceBuilder(vocab, args.n_best, has_tgt=True) preprocessor = Preprocessor(schema, price_tracker, model_args.entity_encoding_form, model_args.entity_decoding_form, model_args.entity_target_form) textint_map = TextIntMap(vocab, preprocessor) remove_symbols = map(vocab.to_ind, (markers.EOS, markers.PAD)) use_cuda = use_gpu(args) kb_padding = mappings['kb_vocab'].to_ind(markers.PAD) dialogue_batcher = DialogueBatcherFactory.get_dialogue_batcher(model=self.model_name, kb_pad=kb_padding, mappings=mappings, num_context=model_args.num_context) # TODO: class variable is not a good way to do this Dialogue.preprocessor = preprocessor Dialogue.textint_map = textint_map Dialogue.mappings = mappings Dialogue.num_context = model_args.num_context Env = namedtuple('Env', ['model', 'vocab', 'preprocessor', 'textint_map', 'stop_symbol', 'remove_symbols', 'gt_prefix', 'max_len', 'dialogue_batcher', 'cuda', 'dialogue_generator', 'utterance_builder', 'model_args']) self.env = Env(model, vocab, preprocessor, textint_map, stop_symbol=vocab.to_ind(markers.EOS), remove_symbols=remove_symbols, gt_prefix=1, max_len=20, dialogue_batcher=dialogue_batcher, cuda=use_cuda, dialogue_generator=generator, utterance_builder=builder, model_args=model_args)
def build_model(model_opt, opt, mappings, checkpoint, model_path = None): print('Building model...') if model_path is None: model, _ = model_builder.make_base_model(model_opt, mappings, use_gpu(opt), checkpoint=checkpoint) else: # print('opt', opt, '\n!!:', model_opt) _mappings, model, model_opt, _critic = model_builder.load_test_model(model_path, opt, model_opt.__dict__) if len(opt.gpuid) > 1: print('Multi gpu training: ', opt.gpuid) model = nn.DataParallel(model, device_ids=opt.gpuid, dim=1) if opt.verbose: print(model) return model
def load_test_model(opt, dummy_opt): checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) fields = onmt.io.load_fields_from_vocab( checkpoint['vocab'], data_type=opt.data_type) model_opt = checkpoint['opt'] for arg in dummy_opt: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt[arg] model = make_base_model(model_opt, fields, use_gpu(opt), checkpoint) model.eval() model.generator.eval() return fields, model, model_opt
def make_loss_compute(model, tgt_vocab, opt, train=True): """ This returns user-defined LossCompute object, which is used to compute loss in train/validate process. You can implement your own *LossCompute class, by subclassing LossComputeBase. """ if opt.copy_attn: compute = onmt.modules.CopyGeneratorLossCompute( model.generator, tgt_vocab, opt.copy_attn_force, opt.copy_loss_by_seqlength) else: compute = onmt.Loss.NMTLossCompute( model.generator, tgt_vocab, label_smoothing=opt.label_smoothing if train else 0.0) if use_gpu(opt): compute.cuda() return compute
def main(): dummy_parser = argparse.ArgumentParser(description='train.py') opts.model_opts(dummy_parser) dummy_opt = dummy_parser.parse_known_args([])[0] opt = parser.parse_args() opt.cuda = opt.gpu > -1 if opt.cuda: torch.cuda.set_device(opt.gpu) # Add in default model arguments, possibly added since training. checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] src_dict = checkpoint['vocab'][1][1] tgt_dict = checkpoint['vocab'][0][1] fields = onmt.io.load_fields_from_vocab(checkpoint['vocab']) model_opt = checkpoint['opt'] for arg in dummy_opt.__dict__: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt.__dict__[arg] model = onmt.ModelConstructor.make_base_model( model_opt, fields, use_gpu(opt), checkpoint) encoder = model.encoder decoder = model.decoder encoder_embeddings = encoder.embeddings.word_lut.weight.data.tolist() decoder_embeddings = decoder.embeddings.word_lut.weight.data.tolist() print("Writing source embeddings") write_embeddings(opt.output_dir + "/src_embeddings.txt", src_dict, encoder_embeddings) print("Writing target embeddings") write_embeddings(opt.output_dir + "/tgt_embeddings.txt", tgt_dict, decoder_embeddings) print('... done.') print('Converting model...')