def make_G_model(model_opt, use_cuda=True, checkpoint=None): src_embeddings = continuousEmbedding(model_opt.src_vocab_size, model_opt.embedding_dim, data.PAD_idx) tgt_embeddings = continuousEmbedding(model_opt.tgt_vocab_size, model_opt.embedding_dim, data.PAD_idx) encoder = onmt.ModelConstructor.make_encoder(model_opt, src_embeddings) decoder = onmt.ModelConstructor.make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = "text" # Make Generator. generator = nn.Sequential( nn.Linear(model_opt.rnn_size, model_opt.tgt_vocab_size), nn.LogSoftmax(dim=-1)) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Initializing G model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) model.generator = generator # Make the whole model leverage GPU if indicated to do so. if use_cuda: model.cuda() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab # TODO: prepare for a future where tgt features are possible. feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required if model_opt.share_embeddings: tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt, fields["src"].vocab, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab src_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, src_feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab tgt_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, tgt_feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make inference network. inference_network = make_inference_network( model_opt, src_embeddings, tgt_embeddings, src_dict, src_feature_dicts, tgt_dict, tgt_feature_dicts ) if model_opt.inference_network_type != "none" else None if model_opt.prior_normalization == "bnshare": decoder.attn.bn_mu = inference_network.bn_mu decoder.attn.bn_std = inference_network.bn_std # Make NMTModel(= encoder + decoder + inference network). model = (NMTModel(encoder, decoder, None, "none") if inference_network is None else ViNMTModel( encoder, decoder, inference_network, dist_type=model_opt.dist_type, use_prior=model_opt.use_prior > 0)) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') #model.load_state_dict(checkpoint['model']) model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = ONMTDataset.collect_feature_dicts(fields) src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) else: encoder = ImageEncoder(model_opt.layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) # Make decoder. tgt_dict = fields["tgt"].vocab # TODO: prepare for a future where tgt features are possible. feature_dicts = [] tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab))) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt, fields["src"].vocab, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) logsoftmax = nn.LogSoftmax() softmax = nn.Softmax() # add the generator to the module (does this register the parameter?) model.generator = generator model.logsoftmax = logsoftmax model.softmax = softmax # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_audio_text_model_from_text(model_opt, fields, text_fields, gpu, checkpoint=None): model = make_base_model(model_opt, fields, gpu, None) src_dict = text_fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(text_fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) text_encoder = make_encoder(model_opt, src_embeddings) generator = model.generator generator.load_state_dict(checkpoint['generator']) text_model = NMTModel(text_encoder, model.decoder) text_model.model_type = 'text' text_model.decoder.set_generator(None) try: if model_opt.conv_global_encoder: global_speech_encoder = ConvGlobalAudioEncoder( model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) else: global_speech_encoder = GlobalAudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) except: global_speech_encoder = GlobalAudioEncoder( model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) print "ff:", model_opt.ff_speech_decoder if model_opt.ff_speech_decoder: speech_decoder = FFAudioDecoder(model_opt.rnn_size * 3, model_opt.rnn_size, model_opt.dec_layers) else: speech_decoder = AudioDecoder(model_opt.rnn_type, model_opt.brnn, model_opt.dec_layers, model_opt.rnn_size, model_opt.global_attention, model_opt.coverage_attn, model_opt.context_gate, model_opt.copy_attn, model_opt.dropout) speech_model = SpeechModel(model.encoder, global_speech_encoder, speech_decoder) if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in text_model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in speech_model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(text_model.encoder, 'embeddings'): text_model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') text_model.load_state_dict(checkpoint['model']) if checkpoint.has_key( 'speech_model') and checkpoint['speech_model'] is not None: print(' Loading speech model parameters') speech_model.load_state_dict(checkpoint['speech_model']) # Add generator to model (this registers it as parameter of model). text_model.decoder.set_generator(generator) text_model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: text_model.cuda() speech_model.cuda() else: text_model.cpu() speech_model.cpu() return model, text_model, speech_model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu: Boolean: whether to use gpu. checkpoint: the snapshot model. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = ONMTDataset.collect_feature_dicts(fields) src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) if model_opt.encoder_type == "double_encoder": inter_dict = fields['inter'].vocab inter_feature_dicts = ONMTDataset.collect_feature_dicts(fields) inter_embeddings = make_embeddings(model_opt, inter_dict, inter_feature_dicts, for_encoder=False, for_encoder_int=True) encoder = make_encoder(model_opt, src_embeddings, inter_embeddings) else: encoder = make_encoder(model_opt, src_embeddings) else: encoder = ImageEncoder(model_opt.layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) # Make decoder. tgt_dict = fields["tgt"].vocab # TODO: prepare for a future where tgt features are possible. feature_dicts = [] tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). if model_opt.encoder_type == "double_encoder": model = DoubleEncNMTModel(encoder, decoder) else: model = NMTModel(encoder, decoder) # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt, fields["src"].vocab, fields["tgt"].vocab) # Load the model states from checkpoint. if checkpoint is not None: print('Loading model') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) # add the generator to the module (does this register the parameter?) model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, stage1=True): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ if stage1: src = "src1" tgt = "tgt1" else: src = "src2" tgt = "tgt2" assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields[src].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, src) src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) table_embeddings = make_embeddings(model_opt, src_dict, feature_dicts, discard_word=True) # reusing the same embedding weights print(table_embeddings.make_embedding[0]) table_embeddings.word_lut.weight = src_embeddings.word_lut.weight table_embeddings.field_lut.weight = src_embeddings.field_lut.weight table_embeddings.type_lut.weight = src_embeddings.type_lut.weight table_embeddings.ha_lut.weight = src_embeddings.ha_lut.weight encoder = make_encoder(model_opt, (src_embeddings, table_embeddings), stage1) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields[tgt].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, tgt) tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight # NOTE: make decoder decoder = make_decoder(model_opt, tgt_embeddings, stage1) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if stage1: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt1"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: # NOTE: CopyGenerator generator = CopyGenerator(model_opt.rnn_size, fields["tgt2"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio", "hierarchical_text"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "hierarchical_text": print("Modelcounstructor line:157 make hierarchical model") src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) sent_encoder = make_encoder(model_opt, src_embeddings) if model_opt.hier_add_word_enc_input: encoder = make_encoder(model_opt, src_embeddings) else: encoder = None # because sub context length is not sorted # 18.08.03 to modify method # sent_encoder.no_pack_padded_seq = True context_encoder = ContextEncoder(model_opt.rnn_type, model_opt.brnn, model_opt.enc_layers, model_opt.rnn_size, model_opt.dropout, model_opt.rnn_size, model_opt.bridge) # because sub context length is not sorted # context_encoder.no_pack_padded_seq = True elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). if model_opt.model_type == "hierarchical_text": model = HierarchicalModel(context_encoder, sent_encoder, decoder, normal_encoder=encoder) else: model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # for deep summarization model.obj_f = model_opt.obj_f print("ModelConstructor line:191, model.obj_f") print(model.obj_f) print("ModelConstructor line:195, tgt vocab len", len(fields["tgt"].vocab)) print("ModelConstructor line:196, tgt vocab freq len", len(fields["tgt"].vocab.freqs)) # input("MC line 222") # get idf value # words = [ fields["tgt"].vocab.itos[i] for i in range(len(fields["tgt"].vocab)) ] # def get_df(src_file_path, words): # words_df = [0] * len(words) # with open(src_file_path, 'r', encoding="utf-8") as src_file: # import collections # cnt = 0 # for line in src_file: # cnt += 1 # src_words = line.split() # src_words_dict = { word:1 for word in src_words } # for i in range(len(words)): # if words[i] in src_words_dict: # words_df[i] += 1 # return words_df, cnt # words_df, cnt = get_df("article_data/article_src_train_src_500_tar_50.txt", words) # print("Modelconstructor line:216 complete get df information") # for i in range(len(fields["tgt"].vocab)): # from math import log # word = words[i] # if words_df[i] == 0: # words_df[i] = 1 # # word_freq = fields["src"].vocab.freqs[word] if fields["src"].vocab.freqs[word] > 0 else 1 # print("{}.{} : {}/{} ".format(i, word, words_df[i], round(log(cnt/words_df[i]),2)), end='|| ') # if i % 10 == 0: # print() # input("ModelConstructor line:191 stop") # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if model_opt.model_type == "hierarhical_text" and hasattr(model.sent_encoder, 'embeddings'): model.sent_encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) elif model_opt.model_type == "text" and hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) if not model_opt.encoder2_type == 'none': src_dict2 = fields["src2"].vocab feature_dicts2 = onmt.io.collect_feature_vocabs(fields, 'src2') src_embeddings2 = make_embeddings(model_opt, src_dict2, feature_dicts2) if 'morph' in fields and hasattr(fields["morph"], 'vocab'): morph_dict = fields["morph"].vocab morph_embeddings = make_morph_embeddings(model_opt, morph_dict, []) encoder = make_encoder(model_opt, src_embeddings, morph_embeddings) encoder2 = make_encoder( model_opt, src_embeddings2, morph_embeddings, encoder_type='rnn' ) if not model_opt.encoder2_type == 'none' else None # else: # encoder = make_encoder(model_opt, src_embeddings) # gcn features must go here # encoder2 = make_encoder(model_opt, src_embeddings2, encoder_type='rnn') if not model_opt.encoder2_type == 'none' else None # gcn features must go here else: encoder = make_encoder(model_opt, src_embeddings, encoder_type=model_opt.encoder_type ) # gcn features must go here if model_opt.encoder2_type == 'none': encoder2 = None else: if model_opt.encoder2_type == 'gcn': encoder2 = make_encoder( model_opt, src_embeddings, encoder_type='gcn') # gcn features must go here elif model_opt.encoder2_type == 'rnn': encoder2 = make_encoder(model_opt, src_embeddings2, encoder_type='rnn') else: raise ValueError("Not implemented yet.") elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). if model_opt.encoder2_type == 'none': encoder2 = None if model_opt.encoder_type == 'gcn': if model_opt.use_dgl: model = NMTModelGCN_DGL(encoder, decoder, encoder2=encoder2) else: model = NMTModelGCN(encoder, decoder, encoder2=encoder2) else: model = NMTModel(encoder, decoder, encoder2=encoder2) model.model_type = model_opt.model_type # text # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.encoder2, 'embeddings'): model.encoder2.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc2, model_opt.fix_word_vecs_enc2) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts, for_encoder=True) encoder = make_encoder(model_opt, src_embeddings) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) device = torch.device("cuda") all_docs = load_all_docs(model_opt, fields, device) # Make NMTModel(= encoder + decoder). if model_opt.encoder_type == 'BiAttEncoder' or model_opt.encoder_type == 'transformer': model = TwoEncoderModel(encoder, decoder, all_docs, src_embeddings) elif model_opt.encoder_type == "PostEncoder": model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, back_model=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) bk_tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight bk_tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings, bk_tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) bk_generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight bk_generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) # bk_generator.load_state_dict(checkpoint['bk_generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # if back_model is not None: # print('Loading back model parameters...') # model_dict = model.state_dict() # for key in model_dict.keys(): # if key.startswith('decoder.bk_rnn'): # for load_key in back_model['model'].keys(): # if key.split('.')[-1] == load_key.split('.')[-1] and load_key.startswith("decoder.rnn"): # print('From pretrained %s load %s' % (load_key, key)) # model_dict.update({key: back_model['model'][load_key]}) # model.load_state_dict(model_dict) # Add generator to model (this registers it as parameter of model). model.generator = generator if model_opt.share_gen: model.bk_generator = generator else: model.bk_generator = bk_generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight # make variational Inference #variationalInference = onmt.Models.VariationalInference( # model_opt.rnn_size, model_opt.rnn_size, model_opt.latent_dim, # model_opt.cluster_num, model_opt.batch_size) model_opt.variable_src_dict = fields["src"].vocab if model_opt.use_gmm > 0: variationalInference = onmt.Models.VariationalInference(model_opt) else: variationalInference = None decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder, variationalInference, model_opt) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) # work for pytorch <= 0.1 version #nn.LogSoftmax(dim=-1)) # work for pytorch >=0.3/4 version if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): print("param_init p:", p) p.data.uniform_(-model_opt.param_init, model_opt.param_init) print("after param_init p:", p) for p in generator.parameters(): print("generator param_init p:", p) p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): print("param_init_glorot p:", p) if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): print("generator param_init_glorot p:", p) if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) try: mmod_generator = 'generator' in model_opt.multimodal_model_type mmod_bank = 'bank' in model_opt.multimodal_model_type mmod_imgw = 'imgw' in model_opt.multimodal_model_type mmod_dcap = 'dcap' in model_opt.multimodal_model_type mmod_model = mmod_bank or mmod_imgw or mmod_dcap except AttributeError: mmod_generator = False mmod_bank = False mmod_imgw = False mmod_model = False mmod_dcap = False try: mmod_generator_add = model_opt.mmod_generator_add except AttributeError: mmod_generator_add = 0.0 try: mmod_use_hidden = model_opt.mmod_use_hidden except AttributeError: mmod_use_hidden = False # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings, mmod_imgw=mmod_imgw) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings( model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings, mmod_dcap) # Make Model if mmod_model: if mmod_bank: bridge = multimodal.MultiModalMemoryBankGate( model_opt.rnn_size, model_opt.img_feat_dim, add=mmod_generator_add) else: bridge = None model = multimodal.MultiModalNMTModel(encoder, bridge, decoder, imgw=mmod_imgw, num_capsules=model_opt.num_capsules, num_regions=model_opt.num_regions, dcap=mmod_dcap) else: # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if model_opt.copy_attn: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) else: print('creating a Sequential generator') generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut_weight if mmod_generator: print('wrapping in a MultiModalGenerator') generator = onmt.modules.multimodal.MultiModalGenerator( generator, model_opt.img_feat_dim, add=mmod_generator_add, use_hidden=mmod_use_hidden) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, init_encoder=False, rev_checkpoint=None, top_layer=100): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) if model_opt.share_rnn: if model_opt.input_feed == 1: raise AssertionError('Cannot share encoder and decoder weights' 'when using input feed in decoder') if model_opt.src_word_vec_size != model_opt.src_word_vec_size: raise AssertionError('Cannot share encoder and decoder weights' 'if embeddings are different sizes') encoder.rnn = decoder.rnn # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None and not init_encoder: print('Loading model parameters from checkpoint.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) if init_encoder: model_dict = checkpoint['model'] encoder_dict = {} model_dict_keys = [] for key in model_dict.keys(): if key[:7] == 'encoder': if key[-7:] == 'reverse': if int(key[-9]) > top_layer: continue else: if key[8:18] != 'embeddings' and int( key[-1]) > top_layer: continue model_dict_keys.append(key) print(model_dict_keys) # Load encoder parameters new_model_dict = model.state_dict() for key, value in model_dict.items(): if key in model_dict_keys: new_model_dict[key] = value """ if rev_checkpoint is not None: rev_model_dict = rev_checkpoint['model'] if key[:7] == 'encoder' and key[8:18] != 'embeddings': new_model_dict[key+'_reverse'] = value """ model.load_state_dict(new_model_dict) # Freeze encoder parameters for name, param in model.named_parameters(): if name in model_dict_keys: param.requires_grad = False # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def run_encoder_to_decoder_state(self, src, second_src, lengths): return NMTModel.run_encoder_to_decoder_state(self, src, lengths)
def make_base_model(model_opt, fields, gpu, checkpoint=None, spelling=None, tgt_char_field=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') if model_opt.use_char_composition == 'None': word_representer = None else: word_representer = WordRepresenter(spelling, len(tgt_char_field.vocab), tgt_char_field.vocab.stoi[onmt.io.PAD_WORD], model_opt.tgt_word_vec_size, char_composition=model_opt.use_char_composition) if gpu: word_representer.init_cuda() if model_opt.use_char_composition == 'None': tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) else: tgt_embeddings = VarEmbedding(word_representer, model_opt.tgt_word_vec_size) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight #TODO: replace the tgt_embeddings obj with our VarEmbeddings decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: if model_opt.use_char_composition == 'None': generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) else: # TODO: make a new generator with VarLinear generator = nn.Sequential( VarLinear(word_representer), nn.LogSoftmax(dim=-1)) pass if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings') and model_opt.use_char_composition == 'None': model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ # Make encoder. src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). if model_opt.decoder_model == 'Rev' and model_opt.encoder_model == 'Rev': model = RevNMTModel(encoder, decoder, fields['tgt'].vocab, opt=model_opt) else: model = NMTModel(encoder, decoder, opt=model_opt) model.model_type = model_opt.model_type # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, stage1=True, basic_enc_dec=False): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ if stage1 and not basic_enc_dec: assert False src = "src1" tgt = "tgt1" else: src = "src2" tgt = "tgt2" src_hist = "src1_hist" if (basic_enc_dec or stage1) else None assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields[src].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, src) src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts, hist_dict=fields[src_hist].vocab, use_hier_hist=True) encoder = make_encoder(model_opt, src_embeddings, stage1, basic_enc_dec) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields[tgt].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, tgt) tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings, stage1 and not basic_enc_dec, basic_enc_dec) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. generator = CopyGenerator(model_opt.rnn_size, fields["tgt2"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) # print("model load stats ...") # new_model_keys = set(model.state_dict().keys()) # old_model_keys = set(checkpoint['model'].keys()) # print("missing keys when load...") # print(new_model_keys - old_model_keys) # print("abundant keys when load...") # print(old_model_keys - new_model_keys) # print("gen load stats...") # new_gen_keys = set(generator.state_dict().keys()) # old_gen_keys = set(checkpoint['generator'].keys()) # print("missing keys when load...") # print(new_gen_keys - old_gen_keys) # print("abundant keys when load...") # print(old_gen_keys - new_gen_keys) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. def _init_model(): if checkpoint is not None: print('Loading model parameters.') load_state_dict(model, checkpoint['model']) if model.encoder.ent_attn.linear_query.weight is not None and 'encoder.ent_attn.linear_query.weight' not in checkpoint['model']: init_val = model_opt.param_init model.encoder.ent_attn.linear_query.weight.data.uniform_(-init_val, init_val) model.encoder.ent_attn.linear_out.bias.data.uniform_(-init_val, init_val) model.encoder.ent_attn.linear_query.bias.data.uniform_(-init_val, init_val) model.encoder.ent_attn.linear_context.weight.data.uniform_(-init_val, init_val) model.encoder.ent_attn.v.weight.data.uniform_(-init_val, init_val) model.encoder.ent_attn.linear_out.weight.data.uniform_(-init_val, init_val) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Load the gen states from checkpoint or initialize them. def _fill_generator(gen, name): if checkpoint is not None and name in checkpoint: print('Loading gen parameters.') gen.load_state_dict(checkpoint[name]) else: if model_opt.param_init != 0.0: print('Intializing gen parameters.') for p in gen.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in gen.parameters(): if p.dim() > 1: xavier_uniform(p) _init_model() _fill_generator(generator, 'generator') # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, src_dict, tgt_dict, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. src_embeddings = make_embeddings(model_opt, src_dict, for_encoder=True) encoder = make_encoder(model_opt, src_embeddings) # Make decoder. tgt_embeddings = make_embeddings(model_opt, tgt_dict, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight # if model_opt.pre_word_vecs_enc is not None: # print("Loading word vectors for encoder") # pretrained = torch.load(opt.pre_word_vecs_enc) # src_embeddings.word_lut.weight.data.copy_(pretrained) # if model_opt.pre_word_vecs_dec is not None: # print("Loading word vectors for encoder") # pretrained = torch.load(opt.pre_word_vecs_dec) # tgt_embeddings.word_lut.weight.data.copy_(pretrained) decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential(nn.Linear(model_opt.rnn_size, len(tgt_dict)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, tgt_dict) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) if model_opt.image_feat_type =='local': # print('@@@ModelConstructor enter') # use the local image features "as is": encoder only reshapes them #将feat_size通过一层网络映射到rnn_size encoder_img = make_encoder_image_local_features(model_opt) elif model_opt.image_feat_type=='global': # transform global image features before using them encoder_img = make_encoder_image_global_features(model_opt) else: encoder_img=None # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder,encoder_img) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_auto_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img"], \ ("Unsupported model type %s" % (model_opt.model_type)) src_fields = fields[0] tgt_fields = fields[1] # Make encoder. src_dict = src_fields["src"].vocab feature_dicts = onmt.IO.collect_feature_dicts(src_fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) src_encoder = make_encoder(model_opt, src_embeddings) src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts, for_encoder=False) src_decoder = make_decoder(model_opt, src_embeddings) # Make decoder. tgt_dict = tgt_fields["src"].vocab feature_dicts = onmt.IO.collect_feature_dicts(tgt_fields, 'src') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts) tgt_encoder = make_encoder(model_opt, tgt_embeddings) tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) tgt_decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). src_auto_model = NMTModel(src_encoder, src_decoder) tgt_auto_model = NMTModel(tgt_encoder, tgt_decoder) # Make Generator. src_generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(src_fields["tgt"].vocab)), nn.LogSoftmax()) tgt_generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(tgt_fields["tgt"].vocab)), nn.LogSoftmax()) # Load the model states from checkpoint or initialize them. if checkpoint is not None: # JD TODO: FIX THIS! print('Loading model parameters.') src_auto_model.load_state_dict(checkpoint['src_auto_model']) tgt_auto_model.load_state_dict(checkpoint['tgt_auto_model']) src_generator.load_state_dict(checkpoint['src_generator']) tgt_generator.load_state_dict(checkpoint['tgt_generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in src_encoder.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in src_decoder.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in tgt_encoder.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in tgt_decoder.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) src_encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) src_decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) tgt_encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) tgt_decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) for p in src_generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in tgt_generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) # Add generator to model (this registers it as parameter of model). src_auto_model.generator = src_generator tgt_auto_model.generator = tgt_generator # Make the whole model leverage GPU if indicated to do so. if gpu: src_auto_model.cuda() tgt_auto_model.cpu() else: src_auto_model.cpu() tgt_auto_model.cpu() return src_auto_model, tgt_auto_model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab print('Collecting feature vocabularies...') feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') print('Building source embeddings...') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) print('Building an encoder...') encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') if model_opt.char_compos_type == 'none': tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) print('Using standard embeddings') else: spells = getVocabSpell(tgt_dict, gpu) embedding_dim = model_opt.tgt_word_vec_size chanQty = model_opt.char_comp_cnn_chan_qty dropout = model_opt.dropout numLayers = model_opt.char_comp_rnn_layer configs = {} for compType in model_opt.char_compos_type.split('-'): if compType == 'brnn': configs['brnn'] = { 'numLayers': numLayers, 'embedSize': embedding_dim } elif compType == 'rnn': configs['rnn'] = { 'numLayers': numLayers, 'embedSize': embedding_dim } elif compType == 'cnn': configs['cnn'] = { 'chanQty': chanQty, 'embedSize': embedding_dim } elif compType == 'wembed': configs['wembed'] = {'embedSize': embedding_dim} else: raise Exception('Invalid composition type: ' + compType) tgt_embeddings = Char2VecComposite( spells, configs, charEmbedSize=model_opt.char_embed_size, dropout=dropout) print('Using char-level composition embeddings of type %s' % model_opt.char_compos_type) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): print('Loading pre-trained source vectors: %s/%s' % (model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)) model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings') and model_opt.char_compos_type == 'none': print('Loading pre-trained source vectors: %s/%s' % (model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)) model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: print('Moving model to CUDA') model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # import pdb; pdb.set_trace() # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') # print("checkpoint") # for name, param in sorted(checkpoint["model"].items()): # print(f"{name}", param.size()) # print() # print("model itself") # for name, param in sorted(model.state_dict().items()): # print(f"{name}", param.size()) # print() state = model.state_dict() if state.keys() == checkpoint["model"].keys(): model.load_state_dict(checkpoint["model"]) else: state = model.state_dict() state.update(checkpoint["model"]) model.load_state_dict(state) for name, param in model.named_parameters(): if "embedding" in name: if model_opt.param_init != 0.0: param.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: if param.dim() > 1: xavier_uniform(p) # print("checkpoint") # for name, param in sorted(checkpoint["generator"].items()): # print(f"{name}", param.size()) # print() # print("model itself generator") # for name, param in sorted(generator.state_dict().items()): # print(f"{name}", param.size()) state = generator.state_dict() if state.keys() == checkpoint["generator"].keys(): generator.load_state_dict(checkpoint["generator"]) else: state = generator.state_dict() state.update(checkpoint["generator"]) generator.load_state_dict(state) for name, param in generator.named_parameters(): if "linear.bias" in name or "linear.weight" in name: if model_opt.param_init != 0.0: param.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: if param.dim() > 1: xavier_uniform(p) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, train_part="all"): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) context = make_context(model_opt, tgt_dict) # Make NMTModel(= encoder + decoder). if model_opt.RISK_ratio > 0.0: scorer = onmt.translate.GNMTGlobalScorer(model_opt.alpha, model_opt.beta, model_opt.coverage_penalty, model_opt.length_penalty) model = NMTModel(encoder, decoder, context, context_type=model_opt.context_type, tgt_vocab=fields['tgt'].vocab, beam_size=model_opt.beam_size, n_best=model_opt.n_best, gpu=gpu, scorer=scorer, min_length=model_opt.min_length, max_length=model_opt.max_length, stepwise_penalty=model_opt.stepwise_penalty, block_ngram_repeat=model_opt.block_ngram_repeat, ignore_when_blocking=model_opt.ignore_when_blocking, copy_attn=model_opt.copy_attn, context_size=model_opt.context_size) else: model = NMTModel(encoder, decoder, context, context_type=model_opt.context_type) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model_dict = checkpoint['model'] if train_part == "context": model_dict = model.state_dict() if 'join' in model_opt.context_type: pretrained_dict = {} for k, v in checkpoint['model'].items(): if k in model_dict: if 'doc_context' in k: k = k.replace('doc_context', 'doc_context.0') pretrained_dict[k] = v else: pretrained_dict = { k: v for k, v in checkpoint['model'].items() if k in model_dict and 'doc_context' not in k } model_dict.update(pretrained_dict) model.load_state_dict(model_dict, strict=False) generator.load_state_dict(checkpoint['generator']) if train_part == "context": print("Freezing parameters of main model") for param in model.parameters(): param.require_grad = False for param in generator.parameters(): param.require_grad = False print("Unfreezing parameters of context") for param in model.doc_context.parameters(): param.require_grad = True if model_opt.param_init != 0.0: param.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: if param.dim() > 1: xavier_uniform(param) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make Generator. use_multimodal_model = model_opt.multimodal_type is not None if use_multimodal_model and 'gm' in model_opt.multimodal_type: generator_in_size = model_opt.rnn_size + model_opt.second_dim else: generator_in_size = model_opt.rnn_size if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(generator_in_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(generator_in_size, fields["tgt"].vocab) if use_multimodal_model: second_dim_in = model_opt.second_dim_in second_dim = model_opt.second_dim mmm_class = onmt.modules.MultiModalModel.multimodal_model_class_by_key( model_opt.multimodal_type) model = mmm_class( encoder=encoder, second_encoder=nn.Sequential(nn.Linear(second_dim_in, second_dim), nn.Sigmoid()), second_dim=second_dim, decoder=decoder, generator=None # Assigned later ) else: # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight # Make NMTModel(= encoder + decoder). if model_opt.encoder_type == "trigramrnn" and model_opt.decoder_type == "rnn": decoder = make_decoder(model_opt, tgt_embeddings) model = NMTSourceTrigramModel(encoder, decoder) elif model_opt.encoder_type == "brnn" and model_opt.decoder_type == "charrnn": [decoder1, decoder2] = make_decoder(model_opt, tgt_embeddings) model = NMTTargetCharModel(encoder, decoder1, decoder2) elif model_opt.encoder_type == "trigramrnn" and model_opt.decoder_type == "charrnn": [decoder1, decoder2] = make_decoder(model_opt, tgt_embeddings) model = CharNMTModel(encoder, decoder1, decoder2) else: decoder = make_decoder(model_opt, tgt_embeddings) model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Initializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.pre_encoder: pretrained = torch.load(model_opt.pre_encoder) encoder_dict = {} for key in pretrained['model']: if key.startswith('encoder'): encoder_dict[key] = pretrained['model'][key] model_dict = model.state_dict() model_dict.update(encoder_dict) model.load_state_dict(model_dict) model.encoder.requires_grad = False for p in model.encoder.parameters(): p.requires_grad = False if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if model_opt.decoder_type == "charrnn": if hasattr(model.decoder1, 'embeddings'): model.decoder1.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) else: if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def make_base_model(opt, model_opt, fields, checkpoint=None): """ Args: opt: the option in current environment. model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. checkpoint: the snapshot model. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make Encoder. if model_opt.model_type == "text": src_vocab = fields["src"].vocab feature_dicts = ONMTDataset.collect_feature_dicts(fields) feats_padding_idx = [ feat_dict.stoi[onmt.IO.PAD_WORD] for feat_dict in feature_dicts ] num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] src_embeddings = make_embeddings( model_opt, src_vocab.stoi[onmt.IO.PAD_WORD], feats_padding_idx, len(src_vocab), for_encoder=True, num_feat_embeddings=num_feat_embeddings) encoder = make_encoder(model_opt, src_embeddings) else: encoder = ImageEncoder(model_opt.layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) # Make Decoder. tgt_vocab = fields["tgt"].vocab # TODO: prepare for a future where tgt features are possible feats_padding_idx = [] tgt_embeddings = make_embeddings(model_opt, tgt_vocab.stoi[onmt.IO.PAD_WORD], feats_padding_idx, len(tgt_vocab), for_encoder=False) decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= Encoder + Decoder). model = NMTModel(encoder, decoder) # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt, fields["src"].vocab, fields["tgt"].vocab) # Load the modle states from checkpoint. if checkpoint is not None: print('Loading model') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) # Make the whole model leverage GPU if indicated to do so. if hasattr(opt, 'gpuid'): cuda = len(opt.gpuid) >= 1 elif hasattr(opt, 'gpu'): cuda = opt.gpu > -1 else: cuda = False if cuda: model.cuda() generator.cuda() else: model.cpu() generator.cpu() model.generator = generator return model