def audiomodel_forward(self, opt, tgt_l=2, bsize=1, t=37): """ Creates a speech-to-text nmtmodel with a custom opt function. Forwards a testbatch and checks output size. Args: opt: Namespace with options source_l: length of input sequence bsize: batchsize """ if opt.encoder_type == 'transformer' or opt.encoder_type == 'cnn': return word_dict = self.get_vocab() feature_dicts = [] enc = AudioEncoder(opt.enc_layers, opt.brnn, opt.rnn_size, opt.dropout, opt.sample_rate, opt.window_size) embeddings = make_embeddings(opt, word_dict, feature_dicts, for_encoder=False) dec = make_decoder(opt, embeddings) model = onmt.Models.NMTModel(enc, dec) test_src, test_tgt, test_length = self.get_batch_audio( bsize=bsize, sample_rate=opt.sample_rate, window_size=opt.window_size, t=t, tgt_l=tgt_l) outputs, attn, _ = model(test_src, test_tgt, test_length) outputsize = torch.zeros(tgt_l - 1, bsize, opt.rnn_size) # Make sure that output has the correct size and type self.assertEqual(outputs.size(), outputsize.size()) self.assertEqual(type(outputs), torch.autograd.Variable) self.assertEqual(type(outputs.data), torch.FloatTensor)
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab src_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, src_feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab tgt_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, tgt_feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make inference network. inference_network = make_inference_network( model_opt, src_embeddings, tgt_embeddings, src_dict, src_feature_dicts, tgt_dict, tgt_feature_dicts ) if model_opt.inference_network_type != "none" else None if model_opt.prior_normalization == "bnshare": decoder.attn.bn_mu = inference_network.bn_mu decoder.attn.bn_std = inference_network.bn_std # Make NMTModel(= encoder + decoder + inference network). model = (NMTModel(encoder, decoder, None, "none") if inference_network is None else ViNMTModel( encoder, decoder, inference_network, dist_type=model_opt.dist_type, use_prior=model_opt.use_prior > 0)) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') #model.load_state_dict(checkpoint['model']) model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model_mmt(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the Multimodal NMT model. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) # encoder = make_encoder(model_opt, src_embeddings) if model_opt.multimodal_model_type in ['imgd', 'imge', 'src+img']: encoder = make_encoder(model_opt, src_embeddings) elif model_opt.multimodal_model_type == 'imgw': # model ImgW uses a specific source-language encoder encoder = RNNEncoderImageAsWord(model_opt.rnn_type, model_opt.brnn, model_opt.enc_layers, model_opt.rnn_size, model_opt.dropout, src_embeddings) else: raise Exception("Multi-modal model type not implemented: %s" % model_opt.multimodal_model_type) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) if model_opt.multimodal_model_type == 'src+img': # use the local image features "as is": encoder only reshapes them encoder_image = make_encoder_image_local_features(model_opt) else: # transform global image features before using them encoder_image = make_encoder_image_global_features(model_opt) # Make NMTModel(= encoder + decoder). # model = NMTModel(encoder, decoder) # model.model_type = model_opt.model_type if model_opt.multimodal_model_type == 'imgd': model = NMTImgDModel(encoder, decoder, encoder_image) elif model_opt.multimodal_model_type == 'imge': model = NMTImgEModel(encoder, decoder, encoder_image) elif model_opt.multimodal_model_type == 'imgw': model = NMTImgWModel(encoder, decoder, encoder_image) elif model_opt.multimodal_model_type == 'src+img': # using image encoder only to reshape local features model = NMTSrcImgModel(encoder, decoder, encoder_image) else: raise Exception("Multi-modal model type not yet implemented: %s" % (model_opt.multimodal_model_type)) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Initializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, stage1=True, basic_enc_dec=False): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ if stage1 and not basic_enc_dec: assert False src = "src1" tgt = "tgt1" else: src = "src2" tgt = "tgt2" src_hist = "src1_hist" if (basic_enc_dec or stage1) else None assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields[src].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, src) src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts, hist_dict=fields[src_hist].vocab, use_hier_hist=True) encoder = make_encoder(model_opt, src_embeddings, stage1, basic_enc_dec) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields[tgt].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, tgt) tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings, stage1 and not basic_enc_dec, basic_enc_dec) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. generator = CopyGenerator(model_opt.rnn_size, fields["tgt2"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) # print("model load stats ...") # new_model_keys = set(model.state_dict().keys()) # old_model_keys = set(checkpoint['model'].keys()) # print("missing keys when load...") # print(new_model_keys - old_model_keys) # print("abundant keys when load...") # print(old_model_keys - new_model_keys) # print("gen load stats...") # new_gen_keys = set(generator.state_dict().keys()) # old_gen_keys = set(checkpoint['generator'].keys()) # print("missing keys when load...") # print(new_gen_keys - old_gen_keys) # print("abundant keys when load...") # print(old_gen_keys - new_gen_keys) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, stage1=True): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ if stage1: src = "src1" tgt = "tgt1" else: src = "src2" tgt = "tgt2" assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields[src].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, src) src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) table_embeddings = make_embeddings(model_opt, src_dict, feature_dicts, discard_word=True) # reusing the same embedding weights print(table_embeddings.make_embedding[0]) table_embeddings.word_lut.weight = src_embeddings.word_lut.weight table_embeddings.field_lut.weight = src_embeddings.field_lut.weight table_embeddings.type_lut.weight = src_embeddings.type_lut.weight table_embeddings.ha_lut.weight = src_embeddings.ha_lut.weight encoder = make_encoder(model_opt, (src_embeddings, table_embeddings), stage1) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields[tgt].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, tgt) tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight # NOTE: make decoder decoder = make_decoder(model_opt, tgt_embeddings, stage1) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if stage1: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt1"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: # NOTE: CopyGenerator generator = CopyGenerator(model_opt.rnn_size, fields["tgt2"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, spelling=None, tgt_char_field=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') if model_opt.use_char_composition == 'None': word_representer = None else: word_representer = WordRepresenter(spelling, len(tgt_char_field.vocab), tgt_char_field.vocab.stoi[onmt.io.PAD_WORD], model_opt.tgt_word_vec_size, char_composition=model_opt.use_char_composition) if gpu: word_representer.init_cuda() if model_opt.use_char_composition == 'None': tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) else: tgt_embeddings = VarEmbedding(word_representer, model_opt.tgt_word_vec_size) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight #TODO: replace the tgt_embeddings obj with our VarEmbeddings decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: if model_opt.use_char_composition == 'None': generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) else: # TODO: make a new generator with VarLinear generator = nn.Sequential( VarLinear(word_representer), nn.LogSoftmax(dim=-1)) pass if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings') and model_opt.use_char_composition == 'None': model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) try: mmod_generator = 'generator' in model_opt.multimodal_model_type mmod_bank = 'bank' in model_opt.multimodal_model_type mmod_imgw = 'imgw' in model_opt.multimodal_model_type mmod_dcap = 'dcap' in model_opt.multimodal_model_type mmod_model = mmod_bank or mmod_imgw or mmod_dcap except AttributeError: mmod_generator = False mmod_bank = False mmod_imgw = False mmod_model = False mmod_dcap = False try: mmod_generator_add = model_opt.mmod_generator_add except AttributeError: mmod_generator_add = 0.0 try: mmod_use_hidden = model_opt.mmod_use_hidden except AttributeError: mmod_use_hidden = False # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings, mmod_imgw=mmod_imgw) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings( model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings, mmod_dcap) # Make Model if mmod_model: if mmod_bank: bridge = multimodal.MultiModalMemoryBankGate( model_opt.rnn_size, model_opt.img_feat_dim, add=mmod_generator_add) else: bridge = None model = multimodal.MultiModalNMTModel(encoder, bridge, decoder, imgw=mmod_imgw, num_capsules=model_opt.num_capsules, num_regions=model_opt.num_regions, dcap=mmod_dcap) else: # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if model_opt.copy_attn: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) else: print('creating a Sequential generator') generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut_weight if mmod_generator: print('wrapping in a MultiModalGenerator') generator = onmt.modules.multimodal.MultiModalGenerator( generator, model_opt.img_feat_dim, add=mmod_generator_add, use_hidden=mmod_use_hidden) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab # TODO: prepare for a future where tgt features are possible. feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required if model_opt.share_embeddings: tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt, fields["src"].vocab, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # import pdb; pdb.set_trace() # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') # print("checkpoint") # for name, param in sorted(checkpoint["model"].items()): # print(f"{name}", param.size()) # print() # print("model itself") # for name, param in sorted(model.state_dict().items()): # print(f"{name}", param.size()) # print() state = model.state_dict() if state.keys() == checkpoint["model"].keys(): model.load_state_dict(checkpoint["model"]) else: state = model.state_dict() state.update(checkpoint["model"]) model.load_state_dict(state) for name, param in model.named_parameters(): if "embedding" in name: if model_opt.param_init != 0.0: param.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: if param.dim() > 1: xavier_uniform(p) # print("checkpoint") # for name, param in sorted(checkpoint["generator"].items()): # print(f"{name}", param.size()) # print() # print("model itself generator") # for name, param in sorted(generator.state_dict().items()): # print(f"{name}", param.size()) state = generator.state_dict() if state.keys() == checkpoint["generator"].keys(): generator.load_state_dict(checkpoint["generator"]) else: state = generator.state_dict() state.update(checkpoint["generator"]) generator.load_state_dict(state) for name, param in generator.named_parameters(): if "linear.bias" in name or "linear.weight" in name: if model_opt.param_init != 0.0: param.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: if param.dim() > 1: xavier_uniform(p) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab print('Collecting feature vocabularies...') feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') print('Building source embeddings...') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) print('Building an encoder...') encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') if model_opt.char_compos_type == 'none': tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) print('Using standard embeddings') else: spells = getVocabSpell(tgt_dict, gpu) embedding_dim = model_opt.tgt_word_vec_size chanQty = model_opt.char_comp_cnn_chan_qty dropout = model_opt.dropout numLayers = model_opt.char_comp_rnn_layer configs = {} for compType in model_opt.char_compos_type.split('-'): if compType == 'brnn': configs['brnn'] = { 'numLayers': numLayers, 'embedSize': embedding_dim } elif compType == 'rnn': configs['rnn'] = { 'numLayers': numLayers, 'embedSize': embedding_dim } elif compType == 'cnn': configs['cnn'] = { 'chanQty': chanQty, 'embedSize': embedding_dim } elif compType == 'wembed': configs['wembed'] = {'embedSize': embedding_dim} else: raise Exception('Invalid composition type: ' + compType) tgt_embeddings = Char2VecComposite( spells, configs, charEmbedSize=model_opt.char_embed_size, dropout=dropout) print('Using char-level composition embeddings of type %s' % model_opt.char_compos_type) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): print('Loading pre-trained source vectors: %s/%s' % (model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)) model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings') and model_opt.char_compos_type == 'none': print('Loading pre-trained source vectors: %s/%s' % (model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)) model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: print('Moving model to CUDA') model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. def _init_model(): if checkpoint is not None: print('Loading model parameters.') load_state_dict(model, checkpoint['model']) if model.encoder.ent_attn.linear_query.weight is not None and 'encoder.ent_attn.linear_query.weight' not in checkpoint['model']: init_val = model_opt.param_init model.encoder.ent_attn.linear_query.weight.data.uniform_(-init_val, init_val) model.encoder.ent_attn.linear_out.bias.data.uniform_(-init_val, init_val) model.encoder.ent_attn.linear_query.bias.data.uniform_(-init_val, init_val) model.encoder.ent_attn.linear_context.weight.data.uniform_(-init_val, init_val) model.encoder.ent_attn.v.weight.data.uniform_(-init_val, init_val) model.encoder.ent_attn.linear_out.weight.data.uniform_(-init_val, init_val) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Load the gen states from checkpoint or initialize them. def _fill_generator(gen, name): if checkpoint is not None and name in checkpoint: print('Loading gen parameters.') gen.load_state_dict(checkpoint[name]) else: if model_opt.param_init != 0.0: print('Intializing gen parameters.') for p in gen.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in gen.parameters(): if p.dim() > 1: xavier_uniform(p) _init_model() _fill_generator(generator, 'generator') # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio", "hierarchical_text"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "hierarchical_text": print("Modelcounstructor line:157 make hierarchical model") src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) sent_encoder = make_encoder(model_opt, src_embeddings) if model_opt.hier_add_word_enc_input: encoder = make_encoder(model_opt, src_embeddings) else: encoder = None # because sub context length is not sorted # 18.08.03 to modify method # sent_encoder.no_pack_padded_seq = True context_encoder = ContextEncoder(model_opt.rnn_type, model_opt.brnn, model_opt.enc_layers, model_opt.rnn_size, model_opt.dropout, model_opt.rnn_size, model_opt.bridge) # because sub context length is not sorted # context_encoder.no_pack_padded_seq = True elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). if model_opt.model_type == "hierarchical_text": model = HierarchicalModel(context_encoder, sent_encoder, decoder, normal_encoder=encoder) else: model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # for deep summarization model.obj_f = model_opt.obj_f print("ModelConstructor line:191, model.obj_f") print(model.obj_f) print("ModelConstructor line:195, tgt vocab len", len(fields["tgt"].vocab)) print("ModelConstructor line:196, tgt vocab freq len", len(fields["tgt"].vocab.freqs)) # input("MC line 222") # get idf value # words = [ fields["tgt"].vocab.itos[i] for i in range(len(fields["tgt"].vocab)) ] # def get_df(src_file_path, words): # words_df = [0] * len(words) # with open(src_file_path, 'r', encoding="utf-8") as src_file: # import collections # cnt = 0 # for line in src_file: # cnt += 1 # src_words = line.split() # src_words_dict = { word:1 for word in src_words } # for i in range(len(words)): # if words[i] in src_words_dict: # words_df[i] += 1 # return words_df, cnt # words_df, cnt = get_df("article_data/article_src_train_src_500_tar_50.txt", words) # print("Modelconstructor line:216 complete get df information") # for i in range(len(fields["tgt"].vocab)): # from math import log # word = words[i] # if words_df[i] == 0: # words_df[i] = 1 # # word_freq = fields["src"].vocab.freqs[word] if fields["src"].vocab.freqs[word] > 0 else 1 # print("{}.{} : {}/{} ".format(i, word, words_df[i], round(log(cnt/words_df[i]),2)), end='|| ') # if i % 10 == 0: # print() # input("ModelConstructor line:191 stop") # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if model_opt.model_type == "hierarhical_text" and hasattr(model.sent_encoder, 'embeddings'): model.sent_encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) elif model_opt.model_type == "text" and hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) if not model_opt.encoder2_type == 'none': src_dict2 = fields["src2"].vocab feature_dicts2 = onmt.io.collect_feature_vocabs(fields, 'src2') src_embeddings2 = make_embeddings(model_opt, src_dict2, feature_dicts2) if 'morph' in fields and hasattr(fields["morph"], 'vocab'): morph_dict = fields["morph"].vocab morph_embeddings = make_morph_embeddings(model_opt, morph_dict, []) encoder = make_encoder(model_opt, src_embeddings, morph_embeddings) encoder2 = make_encoder( model_opt, src_embeddings2, morph_embeddings, encoder_type='rnn' ) if not model_opt.encoder2_type == 'none' else None # else: # encoder = make_encoder(model_opt, src_embeddings) # gcn features must go here # encoder2 = make_encoder(model_opt, src_embeddings2, encoder_type='rnn') if not model_opt.encoder2_type == 'none' else None # gcn features must go here else: encoder = make_encoder(model_opt, src_embeddings, encoder_type=model_opt.encoder_type ) # gcn features must go here if model_opt.encoder2_type == 'none': encoder2 = None else: if model_opt.encoder2_type == 'gcn': encoder2 = make_encoder( model_opt, src_embeddings, encoder_type='gcn') # gcn features must go here elif model_opt.encoder2_type == 'rnn': encoder2 = make_encoder(model_opt, src_embeddings2, encoder_type='rnn') else: raise ValueError("Not implemented yet.") elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). if model_opt.encoder2_type == 'none': encoder2 = None if model_opt.encoder_type == 'gcn': if model_opt.use_dgl: model = NMTModelGCN_DGL(encoder, decoder, encoder2=encoder2) else: model = NMTModelGCN(encoder, decoder, encoder2=encoder2) else: model = NMTModel(encoder, decoder, encoder2=encoder2) model.model_type = model_opt.model_type # text # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.encoder2, 'embeddings'): model.encoder2.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc2, model_opt.fix_word_vecs_enc2) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, back_model=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) bk_tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight bk_tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings, bk_tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) bk_generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight bk_generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) # bk_generator.load_state_dict(checkpoint['bk_generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # if back_model is not None: # print('Loading back model parameters...') # model_dict = model.state_dict() # for key in model_dict.keys(): # if key.startswith('decoder.bk_rnn'): # for load_key in back_model['model'].keys(): # if key.split('.')[-1] == load_key.split('.')[-1] and load_key.startswith("decoder.rnn"): # print('From pretrained %s load %s' % (load_key, key)) # model_dict.update({key: back_model['model'][load_key]}) # model.load_state_dict(model_dict) # Add generator to model (this registers it as parameter of model). model.generator = generator if model_opt.share_gen: model.bk_generator = generator else: model.bk_generator = bk_generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight # make variational Inference #variationalInference = onmt.Models.VariationalInference( # model_opt.rnn_size, model_opt.rnn_size, model_opt.latent_dim, # model_opt.cluster_num, model_opt.batch_size) model_opt.variable_src_dict = fields["src"].vocab if model_opt.use_gmm > 0: variationalInference = onmt.Models.VariationalInference(model_opt) else: variationalInference = None decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder, variationalInference, model_opt) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) # work for pytorch <= 0.1 version #nn.LogSoftmax(dim=-1)) # work for pytorch >=0.3/4 version if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): print("param_init p:", p) p.data.uniform_(-model_opt.param_init, model_opt.param_init) print("after param_init p:", p) for p in generator.parameters(): print("generator param_init p:", p) p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): print("param_init_glorot p:", p) if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): print("generator param_init_glorot p:", p) if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_vi_model_mmt(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the VI multimodal NMT model. - `vi-model1`: a model where there is one global latent variable Z used to predict the image features and to inform the decoder initialisation. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # infer dimensionality of global image features if model_opt.use_posterior_image_features: feat_size = 1000 else: if 'vgg' in model_opt.path_to_train_img_feats.lower(): feat_size = 4096 else: feat_size = 2048 model_opt.global_image_features_dim = feat_size # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) if model_opt.multimodal_model_type in MODEL_TYPES: encoder = make_encoder(model_opt, src_embeddings) else: raise Exception("Multi-modal model type not implemented: %s" % model_opt.multimodal_model_type) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight # image features encoder if model_opt.multimodal_model_type in MODEL_TYPES: if model_opt.use_posterior_image_features: image_features_type = "posterior" image_features_projector = None elif "use_local_image_features" in vars( model_opt) and model_opt.use_local_image_features: image_features_type = "local" image_features_projector = make_encoder_image_local_features( model_opt) else: assert ( model_opt.use_global_image_features ), 'Image features type not recognised. Choose from global, posterior, local.' image_features_type = "global" image_features_projector = None if "use_local_image_features" in vars( model_opt) and model_opt.use_local_image_features: image_feats_dim = model_opt.local_image_features_dim else: image_feats_dim = model_opt.global_image_features_dim if model_opt.multimodal_model_type in MODEL_TYPES: word_dropout = model_opt.word_dropout decoder = StdRNNVIModel1Decoder( model_opt.rnn_type, model_opt.brnn, model_opt.dec_layers, model_opt.rnn_size, model_opt.global_attention, model_opt.coverage_attn, model_opt.context_gate, model_opt.copy_attn, model_opt.dropout, word_dropout, tgt_embeddings, model_opt. z_latent_dim, # additional dimensionality is z_latent_dim model_opt.reuse_copy_attn) else: raise Exception('Model %s not implemented!' % str(model_opt.multimodal_model_type)) if model_opt.multimodal_model_type in MODEL_TYPES: # if we are using a conditional model, it means we will train the variational approximation q # using all observations (x, y, v) and a generative network to predict z from x only. if model_opt.conditional: if image_features_type == 'local': # the reason to use 4 times the RNN is because we concatenate mean src encoding, mean tgt encoding, # and the result of an attention between the source and image feats, and between the target and image feats input_dims = 4 * model_opt.rnn_size else: input_dims = 2 * model_opt.rnn_size + model_opt.global_image_features_dim # this inference network uses x_1^m, y_1^n, v inf_net_global = GlobalFullInferenceNetwork( model_opt.z_latent_dim, input_dims, "normal", image_features_type=image_features_type) # use x_1^m to predict z gen_net_global = GlobalInferenceNetwork(model_opt.z_latent_dim, model_opt.rnn_size, "normal") # create bidirectional LSTM encoder to encode target sentences encoder_tgt = RNNEncoder(model_opt.rnn_type, True, model_opt.enc_layers, model_opt.rnn_size, model_opt.dropout, tgt_embeddings) # flow hidden dimension flow_h_dim = input_dims else: # use x_1^m to predict z inf_net_global = GlobalInferenceNetwork(model_opt.z_latent_dim, model_opt.rnn_size, "normal") gen_net_global = None # there is no target-language encoder encoder_tgt = None # flow hidden dimension flow_h_dim = model_opt.rnn_size # create a separate source-language encoder for the inference network encoder_inference = None if model_opt.non_shared_inference_network: #encoder_inference = make_encoder(model_opt, src_embeddings) src_embeddings_inference = make_embeddings(model_opt, src_dict, feature_dicts) encoder_inference = MeanEncoder(model_opt.enc_layers, src_embeddings_inference) if "two_step_image_prediction" in vars( model_opt) and model_opt.two_step_image_prediction: if model_opt.use_local_image_features: image_feats_dim = model_opt.local_image_features_dim else: image_feats_dim = model_opt.global_image_features_dim if model_opt.use_local_image_features: # TODO remove hard-coded parameters into `opts.py` n_channels = [500, 1000] layer_dims = [3, 5] image_size = 7 # predicting feature activations (7x7), not pixels inf_net_image_features = ImageDeconvolutionLocalFeatures( input_size=model_opt.z_latent_dim, ) # predict image pixels using output of the image features prediction (inf_net_image_features) # TODO remove hard-coded parameters into `opts.py` n_channels = [image_feats_dim, image_feats_dim // 4] layer_dims = [7, 50] image_size = 100 input_size = [2048, 7, 7] inf_net_image_pixels = ImageDeconvolution( input_size=input_size, image_size=image_size, n_channels=n_channels, n_classes=256, apply_log_softmax=True, layer_dims=layer_dims, ) else: # using global or posterior image features inf_net_image_features = ImageGlobalInferenceNetwork( model_opt.z_latent_dim, image_feats_dim, model_opt.rnn_size, False, "normal") # predict image pixels # TODO remove hard-coded parameters into `opts.py` n_channels = 3 if model_opt.use_rgb_images else 1 n_channels = [n_channels] * 2 layer_dims = [25, 50] image_size = 100 inf_net_image_pixels = ImageDeconvolution( model_opt.z_latent_dim, image_size=image_size, n_channels=n_channels) # we are predicting both image features (with image_loss == 'logprob') and image pixels (with image_loss == 'categorical') inf_net_image = (inf_net_image_features, inf_net_image_pixels) else: # we are only predicting either image features (image_loss != 'categorical') or image pixels (image_loss == 'categorical') if model_opt.image_loss != 'categorical': print("Creating image inference network") if model_opt.use_global_image_features or model_opt.use_posterior_image_features: inf_net_image = ImageGlobalInferenceNetwork( model_opt.z_latent_dim, model_opt.global_image_features_dim, model_opt.rnn_size, False, "normal") elif model_opt.use_local_image_features: # TODO remove hard-coded parameters into `opts.py` n_channels = [500, 1000] layer_dims = [3, 5] image_size = 7 # predicting feature activations (7x7), not pixels inf_net_image = ImageDeconvolutionLocalFeatures( input_size=model_opt.z_latent_dim, ) else: raise Exception("Image features type not recognised.") print(inf_net_image) else: # TODO remove hard-coded parameters into `opts.py` n_channels = 3 if model_opt.use_rgb_images else 1 n_channels = [n_channels] * 2 image_size = 100 inf_net_image = ImageDeconvolution(model_opt.z_latent_dim, image_size=image_size, n_channels=n_channels) else: raise Exception('Model %s not implemented!' % str(model_opt.multimodal_model_type)) # Make NMTModel(= encoder + decoder). model = NMTVIModel( encoder, decoder, encoder_inference=encoder_inference, inf_net_global=inf_net_global, gen_net_global=gen_net_global, inf_net_image=inf_net_image, multimodal_model_type='vi-model1', image_loss_type=model_opt.image_loss, image_features_type=image_features_type, image_features_projector=image_features_projector, two_step_image_prediction=model_opt.two_step_image_prediction if "two_step_image_prediction" in vars(model_opt) else False, conditional=model_opt.conditional, encoder_tgt=encoder_tgt) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Initializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, train_part="all"): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) context = make_context(model_opt, tgt_dict) # Make NMTModel(= encoder + decoder). if model_opt.RISK_ratio > 0.0: scorer = onmt.translate.GNMTGlobalScorer(model_opt.alpha, model_opt.beta, model_opt.coverage_penalty, model_opt.length_penalty) model = NMTModel(encoder, decoder, context, context_type=model_opt.context_type, tgt_vocab=fields['tgt'].vocab, beam_size=model_opt.beam_size, n_best=model_opt.n_best, gpu=gpu, scorer=scorer, min_length=model_opt.min_length, max_length=model_opt.max_length, stepwise_penalty=model_opt.stepwise_penalty, block_ngram_repeat=model_opt.block_ngram_repeat, ignore_when_blocking=model_opt.ignore_when_blocking, copy_attn=model_opt.copy_attn, context_size=model_opt.context_size) else: model = NMTModel(encoder, decoder, context, context_type=model_opt.context_type) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model_dict = checkpoint['model'] if train_part == "context": model_dict = model.state_dict() if 'join' in model_opt.context_type: pretrained_dict = {} for k, v in checkpoint['model'].items(): if k in model_dict: if 'doc_context' in k: k = k.replace('doc_context', 'doc_context.0') pretrained_dict[k] = v else: pretrained_dict = { k: v for k, v in checkpoint['model'].items() if k in model_dict and 'doc_context' not in k } model_dict.update(pretrained_dict) model.load_state_dict(model_dict, strict=False) generator.load_state_dict(checkpoint['generator']) if train_part == "context": print("Freezing parameters of main model") for param in model.parameters(): param.require_grad = False for param in generator.parameters(): param.require_grad = False print("Unfreezing parameters of context") for param in model.doc_context.parameters(): param.require_grad = True if model_opt.param_init != 0.0: param.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: if param.dim() > 1: xavier_uniform(param) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make Generator. use_multimodal_model = model_opt.multimodal_type is not None if use_multimodal_model and 'gm' in model_opt.multimodal_type: generator_in_size = model_opt.rnn_size + model_opt.second_dim else: generator_in_size = model_opt.rnn_size if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(generator_in_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(generator_in_size, fields["tgt"].vocab) if use_multimodal_model: second_dim_in = model_opt.second_dim_in second_dim = model_opt.second_dim mmm_class = onmt.modules.MultiModalModel.multimodal_model_class_by_key( model_opt.multimodal_type) model = mmm_class( encoder=encoder, second_encoder=nn.Sequential(nn.Linear(second_dim_in, second_dim), nn.Sigmoid()), second_dim=second_dim, decoder=decoder, generator=None # Assigned later ) else: # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight # Make NMTModel(= encoder + decoder). if model_opt.encoder_type == "trigramrnn" and model_opt.decoder_type == "rnn": decoder = make_decoder(model_opt, tgt_embeddings) model = NMTSourceTrigramModel(encoder, decoder) elif model_opt.encoder_type == "brnn" and model_opt.decoder_type == "charrnn": [decoder1, decoder2] = make_decoder(model_opt, tgt_embeddings) model = NMTTargetCharModel(encoder, decoder1, decoder2) elif model_opt.encoder_type == "trigramrnn" and model_opt.decoder_type == "charrnn": [decoder1, decoder2] = make_decoder(model_opt, tgt_embeddings) model = CharNMTModel(encoder, decoder1, decoder2) else: decoder = make_decoder(model_opt, tgt_embeddings) model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Initializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.pre_encoder: pretrained = torch.load(model_opt.pre_encoder) encoder_dict = {} for key in pretrained['model']: if key.startswith('encoder'): encoder_dict[key] = pretrained['model'][key] model_dict = model.state_dict() model_dict.update(encoder_dict) model.load_state_dict(model_dict) model.encoder.requires_grad = False for p in model.encoder.parameters(): p.requires_grad = False if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if model_opt.decoder_type == "charrnn": if hasattr(model.decoder1, 'embeddings'): model.decoder1.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) else: if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, init_encoder=False, rev_checkpoint=None, top_layer=100): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) if model_opt.share_rnn: if model_opt.input_feed == 1: raise AssertionError('Cannot share encoder and decoder weights' 'when using input feed in decoder') if model_opt.src_word_vec_size != model_opt.src_word_vec_size: raise AssertionError('Cannot share encoder and decoder weights' 'if embeddings are different sizes') encoder.rnn = decoder.rnn # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None and not init_encoder: print('Loading model parameters from checkpoint.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) if init_encoder: model_dict = checkpoint['model'] encoder_dict = {} model_dict_keys = [] for key in model_dict.keys(): if key[:7] == 'encoder': if key[-7:] == 'reverse': if int(key[-9]) > top_layer: continue else: if key[8:18] != 'embeddings' and int( key[-1]) > top_layer: continue model_dict_keys.append(key) print(model_dict_keys) # Load encoder parameters new_model_dict = model.state_dict() for key, value in model_dict.items(): if key in model_dict_keys: new_model_dict[key] = value """ if rev_checkpoint is not None: rev_model_dict = rev_checkpoint['model'] if key[:7] == 'encoder' and key[8:18] != 'embeddings': new_model_dict[key+'_reverse'] = value """ model.load_state_dict(new_model_dict) # Freeze encoder parameters for name, param in model.named_parameters(): if name in model_dict_keys: param.requires_grad = False # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model