def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. src_dict = fields["src"].vocab feature_dicts = ONMTDataset.collect_feature_dicts(fields) src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) # Make decoder. tgt_dict = fields["tgt"].vocab # TODO: prepare for a future where tgt features are possible. feature_dicts = [] tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). #model = NMTModel(encoder, decoder) tgtvocabsize = len(fields["tgt"].vocab) model = VaeModel(encoder, decoder, model_opt) # Make Generator. if not model_opt.copy_attn: ''' generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) ''' generator = Generator(model_opt.rnn_size, len(fields["tgt"].vocab)) if model_opt.share_decoder_embeddings: generator.linear.weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt, fields["src"].vocab, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # add the generator to the module (does this register the parameter?) model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(opt, model_opt, fields, checkpoint=None): """ Args: opt: the option in current environment. model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. checkpoint: the snapshot model. """ assert model_opt.model_type in ["text", "img"], \ ("Unsupported model type %s" % (model_opt.model_type)) if model_opt.model_type == "text": src_vocab = fields["src"].vocab feature_dicts = ONMTDataset.collect_feature_dicts(fields) feat_pad_ix = [feat_dict.stoi[onmt.IO.PAD_WORD] for feat_dict in feature_dicts] num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] src_embeddings = build_embeddings( model_opt, src_vocab.stoi[onmt.IO.PAD_WORD], feat_pad_ix, len(src_vocab), for_encoder=True, num_feat_embeddings=num_feat_embeddings) encoder = Encoder(model_opt.encoder_type, model_opt.brnn, model_opt.rnn_type, model_opt.enc_layers, model_opt.rnn_size, model_opt.dropout, src_embeddings, model_opt.cnn_kernel_width) else: encoder = onmt.modules.ImageEncoder(model_opt.layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) # Make Decoder. tgt_vocab = fields["tgt"].vocab # TODO: prepare for a future where tgt features are possible tgt_embeddings = build_embeddings( model_opt, tgt_vocab.stoi[onmt.IO.PAD_WORD], [], len(tgt_vocab), for_encoder=False) decoder = make_decoder(model_opt.decoder_type, model_opt.rnn_type, model_opt.dec_layers, model_opt.rnn_size, model_opt.input_feed, model_opt.global_attention, model_opt.coverage_attn, model_opt.context_gate, model_opt.copy_attn, model_opt.cnn_kernel_width, model_opt.dropout, tgt_embeddings) # Make NMTModel(= Encoder + Decoder). model = onmt.Models.NMTModel(encoder, decoder) # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = onmt.modules.CopyGenerator(model_opt, fields["src"].vocab, fields["tgt"].vocab) if checkpoint is not None: print('Loading model') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) if hasattr(opt, 'gpuid'): cuda = len(opt.gpuid) >= 1 elif hasattr(opt, 'gpu'): cuda = opt.gpu > -1 else: cuda = False if cuda: model.cuda() generator.cuda() else: model.cpu() generator.cpu() model.generator = generator return model
def make_base_model(opt, model_opt, fields, checkpoint=None): """ Args: opt: the option in current environment. model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. checkpoint: the snapshot model. """ # Make Encoder. src_vocab = fields["src"].vocab num_feat_embeddings = [ len(feat_dict) for feat_dict in ONMTDataset.collect_feature_dicts(fields) ] embeddings = build_embeddings(model_opt, src_vocab.stoi[onmt.IO.PAD_WORD], len(src_vocab), for_encoder=True, num_feat_embeddings=num_feat_embeddings) if model_opt.model_type == "text": encoder = Encoder(model_opt.encoder_type, model_opt.brnn, model_opt.rnn_type, model_opt.enc_layers, model_opt.rnn_size, model_opt.dropout, embeddings) elif model_opt.model_type == "img": encoder = onmt.modules.ImageEncoder(model_opt) else: assert False, ("Unsupported model type %s" % (model_opt.model_type)) # Make Decoder. tgt_vocab = fields["tgt"].vocab embeddings = build_embeddings(model_opt, tgt_vocab.stoi[onmt.IO.PAD_WORD], len(tgt_vocab), for_encoder=False) decoder = onmt.Models.Decoder(model_opt, embeddings) # Make NMTModel(= Encoder + Decoder). model = onmt.Models.NMTModel(encoder, decoder) # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = onmt.modules.CopyGenerator(model_opt, fields["src"].vocab, fields["tgt"].vocab) if checkpoint is not None: print('Loading model') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) if hasattr(opt, 'gpuid'): cuda = len(opt.gpuid) >= 1 elif hasattr(opt, 'gpu'): cuda = opt.gpu > -1 else: cuda = False if cuda: model.cuda() generator.cuda() else: model.cpu() generator.cpu() model.generator = generator return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu: Boolean: whether to use gpu. checkpoint: the snapshot model. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = ONMTDataset.collect_feature_dicts(fields) src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) if model_opt.encoder_type == "double_encoder": inter_dict = fields['inter'].vocab inter_feature_dicts = ONMTDataset.collect_feature_dicts(fields) inter_embeddings = make_embeddings(model_opt, inter_dict, inter_feature_dicts, for_encoder=False, for_encoder_int=True) encoder = make_encoder(model_opt, src_embeddings, inter_embeddings) else: encoder = make_encoder(model_opt, src_embeddings) else: encoder = ImageEncoder(model_opt.layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) # Make decoder. tgt_dict = fields["tgt"].vocab # TODO: prepare for a future where tgt features are possible. feature_dicts = [] tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). if model_opt.encoder_type == "double_encoder": model = DoubleEncNMTModel(encoder, decoder) else: model = NMTModel(encoder, decoder) # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt, fields["src"].vocab, fields["tgt"].vocab) # Load the model states from checkpoint. if checkpoint is not None: print('Loading model') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) # add the generator to the module (does this register the parameter?) model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model