def build_reranker_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Build encoder. src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) reranker = build_reranker(model_opt, src_embeddings) reranker = onmt.models.ReRankerModel(reranker) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") reranker.model_type = model_opt.model_type # Load the model states from checkpoint or initialize them. if checkpoint is not None: reranker.load_state_dict(checkpoint['reranker']) else: if model_opt.param_init != 0.0: for p in reranker.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in reranker.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in reranker.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(reranker.reranker, 'embeddings'): reranker.reranker.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) reranker.to(device) return reranker
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the MemModel. """ # Build encoder. src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = DistractorEncoder( model_opt.rnn_type, model_opt.word_encoder_type, model_opt.sent_encoder_type, model_opt.question_init_type, model_opt.word_encoder_layers, model_opt.sent_encoder_layers, model_opt.question_init_layers, model_opt.rnn_size, model_opt.dropout, src_embeddings, model_opt.lambda_question, model_opt.lambda_answer) # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight bidirectional_encoder = True if model_opt.question_init_type == 'brnn' else False decoder = HierDecoder(model_opt.rnn_type, bidirectional_encoder, model_opt.dec_layers, model_opt.rnn_size, model_opt.global_attention, model_opt.dropout, tgt_embeddings) # Build ans_decoder. ans_dict = fields["answer"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'answer') ans_embeddings = build_embeddings(model_opt, ans_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != ans_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') ans_embeddings.word_lut.weight = src_embeddings.word_lut.weight ans_bidirectional_encoder = True if model_opt.question_init_type == 'brnn' else False ans_decoder = HierDecoderAns(model_opt.rnn_type, ans_bidirectional_encoder, model_opt.dec_layers, model_opt.rnn_size, model_opt.global_attention, model_opt.dropout, ans_embeddings) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") model = DGModel(encoder, decoder, ans_decoder) # Build Generator. gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), gen_func) # Build AnsGenerator. ans_gen_func = nn.LogSoftmax(dim=-1) ans_generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["answer"].vocab)), ans_gen_func) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) ans_generator.load_state_dict(checkpoint['ans_generator'], strict=False) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) if hasattr(model.ans_decoder, 'embeddings'): model.ans_decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.ans_generator = ans_generator model.to(device) return model
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # for backward compatibility if model_opt.rnn_size != -1: model_opt.enc_rnn_size = model_opt.rnn_size model_opt.dec_rnn_size = model_opt.rnn_size if model_opt.model_type == 'text' and \ model_opt.enc_rnn_size != model_opt.dec_rnn_size: raise AssertionError("""We do not support different encoder and decoder rnn sizes for translation now.""") # Build encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) if model_opt.refer: ref_dict = fields["ref"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'ref') ref_embeddings = build_embeddings(model_opt, ref_dict, feature_dicts) refer = build_encoder(model_opt, ref_embeddings) else: refer = None # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") model = onmt.models.NMTModel(encoder, decoder, refer) # Build Generator. if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), gen_func) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: # This preserves backward-compat for models using customed layernorm def fix_key(s): s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2', r'\1.layer_norm\2.bias', s) s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2', r'\1.layer_norm\2.weight', s) return s checkpoint['model'] = \ {fix_key(k): v for (k, v) in checkpoint['model'].items()} # end of patch for backward compatibility model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the MemModel. """ # Build encoder. src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") model = onmt.models.MemModel(encoder, decoder) # Build Generator. if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), gen_func) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight elif model_opt.coref_vocab or model_opt.coref_attn: generator = CorefGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab, fields["coref_tgt"].vocab) else: generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Build encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") model = onmt.models.NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Build Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) if model_opt.share_embeddings: assert model.encoder.embeddings.word_lut.weight \ is model.decoder.embeddings.word_lut.weight # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def build_end2end_model(model_opt, fields, gpu, checkpoint=None, sel_checkpoint=None, s2s_gen_checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. sel_checkpoint: the model gnerated by selector pre-train phase. Returns: the E2EModel. """ assert model_opt.model_type in ["text"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Build selector. src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') sel_src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) selector = build_selector(model_opt, sel_src_embeddings) # Build encoder if model_opt.e2e_type == "separate_enc_sel": if model_opt.selector_share_embeddings: # the shared embeddings are in the encoder.embeddings # TODO: change the state name to load the embeddings in the pretrained selector embeddings assert model_opt.load_pretrained_selector_from == '' src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) src_embeddings.word_lut.weight = sel_src_embeddings.word_lut.weight else: src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) else: # model_opt.e2e_type == "share_enc_sel" src_embeddings = sel_src_embeddings encoder = None # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build E2EModel(= encoder + selector + decoder). device = torch.device("cuda" if gpu else "cpu") model = onmt.models.E2EModel(encoder, selector, decoder, e2e_type=model_opt.e2e_type, use_gt_sel_probs=model_opt.use_gt_sel_probs) model.model_type = model_opt.model_type # Build Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['end2end_model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if sel_checkpoint is not None: model.load_state_dict(sel_checkpoint['selector'], strict=False) if s2s_gen_checkpoint is not None: model.load_state_dict(s2s_gen_checkpoint['model'], strict=False) generator.load_state_dict(s2s_gen_checkpoint['generator']) # if hasattr(model.encoder, 'embeddings'): # model.encoder.embeddings.load_pretrained_vectors( # model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) # if hasattr(model.decoder, 'embeddings'): # model.decoder.embeddings.load_pretrained_vectors( # model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Build encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder( model_opt, src_embeddings ) # we added additional encoder: TransformerEncoderLM elif model_opt.model_type == "img": if ("image_channel_size" not in model_opt.__dict__): image_channel_size = 3 else: image_channel_size = model_opt.image_channel_size encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.enc_rnn_size, model_opt.dropout, image_channel_size) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers, model_opt.dec_layers, model_opt.brnn, model_opt.enc_rnn_size, model_opt.dec_rnn_size, model_opt.audio_enc_pooling, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) lm_aux = model_opt.encoder_type == "transformerAuxLTR" # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") # the model will return more stuff model = onmt.models.NMTModel(encoder, decoder, lm_aux=lm_aux) # Build Generator. # Hmmm...generator is just hidden states -> word in vocab # since we use shared embedding between encoder and decoder..plus shared embedding between # decoder src to tgt... if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), gen_func) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab) # Build Source Generator # not considering copy attention right now if lm_aux: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) # source vocab does not have <s> </s>, but share_vocab might be different... src_generator = nn.Sequential( nn.Linear(model_opt.enc_rnn_size, len(fields["src"].vocab)), gen_func) # this would have made sure that both encoder and decoder share the same generator if model_opt.share_decoder_embeddings: src_generator[0].weight = src_embeddings.word_lut.weight # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) # if lm_aux: # src_generator.load_state_dict(checkpoint['src_generator'], strict=False) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if lm_aux: for p in src_generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if lm_aux: for p in src_generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator if lm_aux: model.src_generator = src_generator model.to(device) return model
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Build encoder. src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build NMTModel(= [session encoder] + encoder + decoder). device = torch.device("cuda" if gpu else "cpu") if model_opt.experiment == 'session': # Build Session Encoder. item_embeddings = build_embeddings(model_opt, fields["src_item_sku"].vocab, [], for_user=True) user_log_embeddings = build_embeddings(model_opt, fields["src_user_log"].vocab, [], for_user=True) user_op_embeddings = build_embeddings(model_opt, fields["src_operator"].vocab, [], for_user=True) user_site_cy_embeddings = build_embeddings(model_opt, fields["src_site_cy"].vocab, [], for_user=True) user_site_pro_embeddings = build_embeddings( model_opt, fields["src_site_pro"].vocab, [], for_user=True) user_site_ct_embeddings = build_embeddings(model_opt, fields["src_site_ct"].vocab, [], for_user=True) session_encoder = SessionEncoder(item_embeddings, user_log_embeddings, user_op_embeddings, user_site_cy_embeddings, user_site_pro_embeddings, user_site_ct_embeddings) else: session_encoder = None model = onmt.models.NMTModel(session_encoder, encoder, decoder) model.model_type = model_opt.model_type # Build Generator. Copy Generator. generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab, model_opt.session_weight, model_opt.explanation_weight) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # for backward compatibility if model_opt.rnn_size != -1: model_opt.enc_rnn_size = model_opt.rnn_size model_opt.dec_rnn_size = model_opt.rnn_size if model_opt.model_type == 'text' and \ model_opt.enc_rnn_size != model_opt.dec_rnn_size: raise AssertionError("""We do not support different encoder and decoder rnn sizes for translation now.""") # Build encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": if ("image_channel_size" not in model_opt.__dict__): image_channel_size = 3 else: image_channel_size = model_opt.image_channel_size encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.enc_rnn_size, model_opt.dropout, image_channel_size) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers, model_opt.dec_layers, model_opt.brnn, model_opt.enc_rnn_size, model_opt.dec_rnn_size, model_opt.audio_enc_pooling, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") model = onmt.models.NMTModel(encoder, decoder) # The generator function only matters at translation time, so it is not # necessary to create it here anymore. At translation time, the model's # model_opt will still have a value for generator_function or loss_alpha. # This is sufficient to figure out what function to use at translation # time. generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab))) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight # Load the model states from checkpoint or initialize them. if checkpoint is not None: # This preserves backward-compat for models using customed layernorm def fix_key(s): s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2', r'\1.layer_norm\2.bias', s) s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2', r'\1.layer_norm\2.weight', s) return s checkpoint['model'] = \ {fix_key(k): v for (k, v) in checkpoint['model'].items()} # end of patch for backward compatibility model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def build_base_model(model_opt, fields, gpu, length_model, length_penalty_a, length_penalty_b, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # for backward compatibility if model_opt.rnn_size != -1: model_opt.enc_rnn_size = model_opt.rnn_size model_opt.dec_rnn_size = model_opt.rnn_size if model_opt.model_type == 'text' and \ model_opt.enc_rnn_size != model_opt.dec_rnn_size: raise AssertionError("""We do not support different encoder and decoder rnn sizes for translation now.""") # Build encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": if ("image_channel_size" not in model_opt.__dict__): image_channel_size = 3 else: image_channel_size = model_opt.image_channel_size encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.enc_rnn_size, model_opt.dropout, image_channel_size) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers, model_opt.dec_layers, model_opt.brnn, model_opt.enc_rnn_size, model_opt.dec_rnn_size, model_opt.audio_enc_pooling, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") model = onmt.models.NMTModel(encoder, decoder) # Build Generator. if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) # generator = nn.Sequential( # nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), # gen_func # ) # MMM class tune_out_prob(nn.Module): def __init__(self): super(tune_out_prob, self).__init__() self.t_lens = None self.eos_ind = None self.batch_max_len = None self.word_index = None self.tgt_vocab_size = None self.validation = False def length_model_loss(self, scale, value, a, b): #return -(value / scale) ** 2 - scale.log() #return -((value / scale) **2)/2 - (2.5066*scale).log() return -a * (value / scale)**2 + b #*abs(scale) # return -((value / scale) ** 2)*scale + scale #return -(value / scale)*4 + scale def forward(self, x): y = x.clone() #mask = np.ones(x.size()) # for i in range(self.t_lens.size(-1)): # y[i*self.batch_size + self.t_lens[i], self.eos_ind] = \ # y[i * self.batch_size + self.t_lens[i], self.eos_ind].clone() + math.log(0.9) if self.training or self.validation: # training phase y = y.view(self.batch_max_len, -1, self.tgt_vocab_size) # eos_list = [(i * self.batch_max_len + self.t_lens.data.cpu().numpy()[i]) for i in # range(self.t_lens.size(-1))] # other_list = list(set(list(range(x.size(0)))) - set(eos_list)) # y[other_list, self.eos_ind] = -100 # y[eos_list, self.eos_ind] = 0 for wi in range(self.batch_max_len): delta_p = (self.t_lens - wi - 1).float() delta_p[delta_p < 0] = 0.05 * delta_p[delta_p < 0] scale = (self.t_lens.float()).sqrt() / 2.0 penalties = self.length_model_loss( scale, delta_p, length_penalty_a, length_penalty_b) #penalties[penalties > 0] = 0 y[wi, :, self.eos_ind] += penalties y = y.view(-1, self.tgt_vocab_size) #mask[eos_list, self.eos_ind] = +2 #mask[other_list, self.eos_ind] = -2 else: # translation phase if len( x.size() ) == 3: # x of shape [ tgt_len, batch_size, vocab ] is a full sentence # for i in range(len(self.t_lens)): # other_list = list(set(list(range(x.size(0)))) - set(list([self.t_lens.data.cpu().numpy()[i]]))) # #mask[other_list, i, self.eos_ind] = -2 # y[other_list, i, self.eos_ind] = -100 # if self.t_lens[i] < x.size(0): # #mask[self.t_lens[i], i, self.eos_ind] = +2 # y[self.t_lens[i], i, self.eos_ind] = 0 pass else: # x of shape [(batch_size x beam_size) , vocab ] is only for one step beam_size = x.size(0) // self.t_lens.numel() wi = self.word_index delta_p = (self.t_lens - wi - 2).float() delta_p[delta_p < 0] = 0.005 * delta_p[delta_p < 0] delta_p = delta_p.unsqueeze(1).expand( self.t_lens.numel(), beam_size).flatten() scale = (self.t_lens.float()).sqrt() / 2.0 scale = scale.unsqueeze(1).expand( self.t_lens.numel(), beam_size).flatten() penalties = self.length_model_loss( scale, delta_p, length_penalty_a, length_penalty_b) #penalties[penalties > 0] = 0 y[:, self.eos_ind] += penalties #y[eos_list ^ 1, self.eos_ind] = -100 return y #mask = torch.tensor(mask, dtype=x.dtype).to(device) #x= x+mask #return x # y = x.clone() # # 1. since y is the output of log_softmax, apply exponential # # to convert it to probabilistic form # y = torch.exp(y) # # 2. tune probabilities # eos_list = [(i * self.batch_max_len + self.t_lens.data.cpu().numpy()[i]) for i in # range(self.t_lens.size(-1))] # other_list = list(set(list(range(y.size(0)))) - set(eos_list)) # # z = y.clone() # # 2.1. tune probabilities for eos positions # z[eos_list, self.eos_ind] = 1 # z[eos_list, 0:self.eos_ind] = 0 # z[eos_list, self.eos_ind+1:-1] = 0 # # # 2.2. tune probabilities for non-eos positions # p_val = z[other_list, self.eos_ind] / (self.tgt_vocab_size - 1) # z[other_list, self.eos_ind] = 0 # non_eos_inds = list(set(list(range(self.tgt_vocab_size))) - set([self.eos_ind])) # for i in range(len(other_list)): # z[other_list[i], non_eos_inds] = y[other_list[i], non_eos_inds] + p_val[i] # # # 3. convert y back to log-probability form # z = torch.log(z) # return z # MMM if length_model == 'oracle' or length_model == 'fixed_ratio' or length_model == 'lstm': generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), gen_func, tune_out_prob()) else: generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), gen_func) # generator = nn.Sequential( # nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), # gen_func # ) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: # This preserves backward-compat for models using customed layernorm def fix_key(s): s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2', r'\1.layer_norm\2.bias', s) s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2', r'\1.layer_norm\2.weight', s) return s checkpoint['model'] = \ {fix_key(k): v for (k, v) in checkpoint['model'].items()} # end of patch for backward compatibility model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type == "text", ("Unsupported model type %s" % (model_opt.model_type)) # Build encoder. logger.info("build_base_model") if model_opt.model_type == "text": src_dict = fields[ "src"].vocab # torchtext.vocab.Vocab object: dict_keys(['vectors', 'stoi', 'freqs', 'itos']) feature_dicts = inputters.collect_feature_vocabs(fields, 'src') # list: [] src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) ''' Embeddings( (make_embedding): Sequential( (emb_luts): Elementwise( (0): Embedding(24997, 500, padding_idx=1) ) ) ) ''' #logger.info("src embeddings") #logger.info(src_embeddings) logger.info("bulding question encoder") encoder = build_encoder(model_opt, src_embeddings) logger.info(encoder) ############### Modified ############################### ans_dict = fields["ans"].vocab ans_embeddings = build_embeddings(model_opt, ans_dict, feature_dicts) logger.info("building answer encoder") encoder_ans = build_encoder(model_opt, ans_embeddings) logger.info(encoder_ans) ##########################################################s # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") model = NMTModel(encoder, encoder_ans, decoder) model.model_type = model_opt.model_type # Build Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size * 2, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def build_base_model(model_opt, fields, gpu, FeatureValues, FeatureTensors, FeatureTypes, FeaturesList, FeatureNames, FTInfos, FeatureTypesNames, SimulationLanguages, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. WALS info checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Build encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": if ("image_channel_size" not in model_opt.__dict__): image_channel_size = 3 else: image_channel_size = model_opt.image_channel_size encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, image_channel_size) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Wals print( 'Building embeddings for each WALS feature and MLP models for each feature type...' ) embeddings_list, embeddings_keys, mlp_list, mlp_keys = [], [], [], [] for FeatureType in FeatureTypes: list_features = FeatureType[1] for Feature in list_features: globals()['embedding_%s' % Feature] = build_feature_embeddings( gpu, FeatureTensors, FeaturesList, FeatureNames, Feature) # 192 embedding structures, one for each feature. embeddings_keys.append(Feature) embeddings_list.append(globals()['embedding_%s' % Feature]) globals()['mlp_%s' % FeatureType[0]] = build_mlp_feature_type( model_opt, FTInfos, FeatureTypesNames, FeatureType[0]) # 11 MLPs, one for each feature type. mlp_keys.append(FeatureType[0]) mlp_list.append(globals()['mlp_%s' % FeatureType[0]]) embeddings_dic_keys = dict(zip(embeddings_keys, embeddings_list)) EmbeddingFeatures = nn.ModuleDict(embeddings_dic_keys) mlp_dic_keys = dict(zip(mlp_keys, mlp_list)) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") if model_opt.wals_model == 'EncInitHidden_Target': MLP2RNNHiddenSize_Target = build_mlp2rnnhiddensize_target( model_opt, FTInfos) print('Embeddings for WALS features and MLP models are built!') model = EncoderInitialization(model_opt.wals_model, encoder, decoder, MLP2RNNHiddenSize_Target, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: uses WALS features from the target language to initialize encoder's hidden state." ) elif model_opt.wals_model == 'EncInitHidden_Both': MLP2RNNHiddenSize_Both = build_mlp2rnnhiddensize_both( model_opt, FTInfos) print('Embeddings for WALS features and MLP models are built!') model = EncoderInitialization(model_opt.wals_model, encoder, decoder, MLP2RNNHiddenSize_Both, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: uses WALS features from the source and target languages to initialize encoder's hidden state." ) elif model_opt.wals_model == 'DecInitHidden_Target': MLP2RNNHiddenSize_Target = build_mlp2rnnhiddensize_target( model_opt, FTInfos) print('Embeddings for WALS features and MLP models are built!') model = DecoderInitialization(model_opt.wals_model, encoder, decoder, MLP2RNNHiddenSize_Target, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: adds WALS features from the target language to the encoder's output to initialize decoder's hidden state." ) elif model_opt.wals_model == 'DecInitHidden_Both': MLP2RNNHiddenSize_Both = build_mlp2rnnhiddensize_both( model_opt, FTInfos) print('Embeddings for WALS features and MLP models are built!') model = DecoderInitialization(model_opt.wals_model, encoder, decoder, MLP2RNNHiddenSize_Both, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: adds WALS features from the source and target languages to the encoder's output to initialize decoder's hidden state." ) elif model_opt.wals_model == 'WalstoSource_Target': MLP2WALSHiddenSize_Target = build_mlp2walshiddensize_target( model_opt, FTInfos) print('Embeddings for WALS features and MLP models are built!') model = CombineWalsSourceWords(model_opt.wals_model, encoder, decoder, MLP2WALSHiddenSize_Target, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: concatenates WALS features from the target language to source words embeddings." ) elif model_opt.wals_model == 'WalstoSource_Both': MLP2WALSHiddenSize_Both = build_mlp2walshiddensize_both( model_opt, FTInfos) print('Embeddings for WALS features and MLP models are built!') model = CombineWalsSourceWords(model_opt.wals_model, encoder, decoder, MLP2WALSHiddenSize_Both, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: concatenates WALS features from the source and target languages to source words embeddings." ) elif model_opt.wals_model == 'WalstoTarget_Target': MLP2WALSHiddenSize_Target = build_mlp2walshiddensize_target( model_opt, FTInfos) print('Embeddings for WALS features and MLP models are built!') model = CombineWalsTargetWords(model_opt.wals_model, encoder, decoder, MLP2WALSHiddenSize_Target, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: concatenates WALS features from the target language to target words embeddings." ) elif model_opt.wals_model == 'WalstoTarget_Both': MLP2WALSHiddenSize_Both = build_mlp2walshiddensize_both( model_opt, FTInfos) print('Embeddings for WALS features and MLP models are built!') model = CombineWalsTargetWords(model_opt.wals_model, encoder, decoder, MLP2WALSHiddenSize_Both, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: concatenates WALS features from the source and target languages to target words embeddings." ) elif model_opt.wals_model == 'WalsDoublyAttentive_Target': MLPFeatureTypes = nn.ModuleDict(mlp_dic_keys) MLP_AttentionTarget = build_doublyattentive_target(model_opt) print('Embeddings for WALS features and MLP models are built!') model = WalsDoublyAttention(model_opt.wals_model, encoder, decoder, MLP_AttentionTarget, MLPFeatureTypes, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: the WALS features from the target language are incorporated as an additional attention mechanism." ) elif model_opt.wals_model == 'WalsDoublyAttentive_Both': MLPFeatureTypes = nn.ModuleDict(mlp_dic_keys) MLP_AttentionBoth = build_doublyattentive_both(model_opt) print('Embeddings for WALS features and MLP models are built!') model = WalsDoublyAttention(model_opt.wals_model, encoder, decoder, MLP_AttentionBoth, MLPFeatureTypes, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: the WALS features from the source and target languages are incorporated as an additional attention mechanism." ) elif model_opt.wals_model == 'WalstoDecHidden_Target': MLP2WALSHiddenSize_Target = build_mlp2walshiddensize_target( model_opt, FTInfos) print('Embeddings for WALS features and MLP models are built!') model = WalstoDecHidden(model_opt.wals_model, encoder, decoder, MLP2WALSHiddenSize_Target, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: concatenates WALS features from the target language to decoder hidden state." ) elif model_opt.wals_model == 'WalstoDecHidden_Both': MLP2WALSHiddenSize_Both = build_mlp2walshiddensize_both( model_opt, FTInfos) print('Embeddings for WALS features and MLP models are built!') model = WalstoDecHidden(model_opt.wals_model, encoder, decoder, MLP2WALSHiddenSize_Both, EmbeddingFeatures, FeatureValues, FeatureTypes, SimulationLanguages, model_opt) print( "Model created: concatenates WALS features from the source and target languages to decoder hidden state." ) else: raise Exception("WALS model type not yet implemented: %s" % (opt.wals_model)) model.model_type = model_opt.model_type # Build Generator. if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), gen_func) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def build_lm_bias_base_model(model_opt, fields, gpu, checkpoint=None, lm_out_checkpoint=None, lm_in_checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Build encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) lm_out_src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) lm_in_src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) lm_out_encoder = build_encoder(model_opt, lm_out_src_embeddings, "lm") lm_in_encoder = build_encoder(model_opt, lm_in_src_embeddings, "lm") # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) lm_out_tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) lm_in_tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) lm_out_decoder = build_decoder(model_opt, lm_out_tgt_embeddings, "lm") lm_in_decoder = build_decoder(model_opt, lm_in_tgt_embeddings, "lm") # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") model = onmt.models.NMTModel(encoder, decoder) lm_out_model = onmt.models.LMModel(lm_out_encoder, lm_out_decoder) lm_in_model = onmt.models.LMModel(lm_in_encoder, lm_in_decoder) model.model_type = model_opt.model_type lm_out_model.model_type = model_opt.model_type lm_in_model.model_type = model_opt.model_type # Build Generator. gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), gen_func) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight lm_out_generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), gen_func) lm_in_generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), gen_func) # Load the model states from checkpoint or initialize them. assert checkpoint is not None load_model_dict = {k: checkpoint['model'][k] for k in checkpoint['model']} model_dict = model.state_dict() model_dict.update(load_model_dict) model.load_state_dict(model_dict) generator.load_state_dict(checkpoint['generator']) assert lm_out_checkpoint['model'] is not None and lm_in_checkpoint[ 'model'] is not None load_model_dict = { k: lm_out_checkpoint['model'][k] for k in lm_out_checkpoint['model'] } model_dict = lm_out_model.state_dict() model_dict.update(load_model_dict) lm_out_model.load_state_dict(model_dict) lm_out_generator.load_state_dict(lm_out_checkpoint['generator']) load_model_dict = { k: lm_in_checkpoint['model'][k] for k in lm_in_checkpoint['model'] } model_dict = lm_in_model.state_dict() model_dict.update(load_model_dict) lm_in_model.load_state_dict(model_dict) lm_in_generator.load_state_dict(lm_in_checkpoint['generator']) # Add generator to model (this registers it as parameter of model). model.generator = generator model.lm_out = lm_out_model model.lm_in = lm_in_model model.lm_out.generator = lm_out_generator model.lm_in.generator = lm_in_generator for param in model.lm_out.parameters(): param.requires_grad = False for param in model.lm_in.parameters(): param.requires_grad = False model.to(device) return model
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio", "vector"], \ ("Unsupported model type %s" % (model_opt.model_type)) use_src_directly_for_dec = False # Build encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": if ("image_channel_size" not in model_opt.__dict__): image_channel_size = 3 else: image_channel_size = model_opt.image_channel_size encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.enc_rnn_size, model_opt.dropout, image_channel_size) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers, model_opt.dec_layers, model_opt.brnn, model_opt.enc_rnn_size, model_opt.dec_rnn_size, model_opt.audio_enc_pooling, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) elif model_opt.model_type == "vector": use_src_directly_for_dec = True if not hasattr(fields["src"], 'vocab'): fields["src"].vocab = fields["tgt"].vocab src_dict = fields["src"].vocab #self.word_lut.weight.requires_grad = False feature_dicts = inputters.collect_feature_vocabs(fields, 'src') tgt_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) if model_opt.encoder_type == "rnn" or model_opt.encoder_type == "brnn": encoder = RNNEncoder(model_opt.rnn_type, model_opt.brnn, model_opt.enc_layers, model_opt.enc_rnn_size, model_opt.dropout, None, model_opt.bridge) tgt_embeddings = None elif model_opt.decoder_type == "cnn": use_src_directly_for_dec = False encoder = CNNEncoder(model_opt.enc_layers, model_opt.enc_rnn_size, model_opt.cnn_kernel_width, model_opt.dropout, None) tgt_embeddings = None else: encoder = None # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') if model_opt.model_type != "vector": tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # else: # tgt_embeddings = None # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") if model_opt.decoder_type.startswith("vecdif"): model = onmt.models.VecModel( encoder, decoder, use_src_directly_for_dec=use_src_directly_for_dec) else: model = onmt.models.NMTModel( encoder, decoder, use_src_directly_for_dec=use_src_directly_for_dec) # Build Generator. if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) elif model_opt.generator_function == "sigmoid": gen_func = nn.Sigmoid() else: gen_func = nn.LogSoftmax(dim=-1) if model_opt.model_type == "vector": if model_opt.generator_function == "none": # if model_opt.final_vec_size != model_opt.dec_rnn_size: # generator = nn.Sequential( # nn.Linear(model_opt.dec_rnn_size, model_opt.final_vec_size)) # else: generator = None else: generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, model_opt.final_vec_size), gen_func) else: generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), gen_func) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model'], strict=False) if generator is not None: generator.load_state_dict(checkpoint['generator'], strict=False) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if generator is not None: for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) if generator is not None: for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings') and model_opt.model_type != "vector": model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings') and model_opt.model_type != "vector": model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # for backward compatibility if model_opt.rnn_size != -1: model_opt.enc_rnn_size = model_opt.rnn_size model_opt.dec_rnn_size = model_opt.rnn_size if model_opt.model_type == 'text' and \ model_opt.enc_rnn_size != model_opt.dec_rnn_size: raise AssertionError("""We do not support different encoder and decoder rnn sizes for translation now.""") # Build encoder. logger.info('Building encoder......') if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": if ("image_channel_size" not in model_opt.__dict__): image_channel_size = 3 else: image_channel_size = model_opt.image_channel_size encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.enc_rnn_size, model_opt.dropout, image_channel_size) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers, model_opt.dec_layers, model_opt.brnn, model_opt.enc_rnn_size, model_opt.dec_rnn_size, model_opt.audio_enc_pooling, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Build decoder. logger.info('Building decoder......') tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') logger.info('** Sharing word embedding matrix between src/tgt') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight elif src_dict == tgt_dict: logger.info('WARNING: NOT SHARING WORD EMBEDDINGS FOR TIED VOCAB???') exit(0) decoder = build_decoder(model_opt, tgt_embeddings) # Build NMTModel(= encoder + decoder). logger.info('Building NMTModel......') device = torch.device("cuda" if gpu else "cpu") model = onmt.models.NMTModel(encoder, decoder) # Build Generator. logger.info('Building generator......') # (standard generator) # # Given final hidden state (after attention) at t-th decoding step, return # # s_t = log(softmax(W h_t + b)) # # where W is optionally tied to the decoder word embedding matrix. if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), gen_func ) if model_opt.share_decoder_embeddings: logger.info('** Sharing generator softmax with tgt word embedding') generator[0].weight = decoder.embeddings.word_lut.weight else: logger.info('WARNING: NOT SHARING GENERATOR SOFTMAX WITH TGT WORD ' 'EMBEDDING MATRIX - IS THERE A GOOD REASON?') else: generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: logger.info('Loade model states from checkpoint......') model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) else: logger.info('Initializing parameters......') if not model_opt.param_init_glorot: logger.info('WARNING: NOT USING XAVIER INITIALIZATION? WILL JUST ' 'USE UNIF(+- %.2f)' % (model_opt.param_init)) if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): if model_opt.pre_word_vecs_enc: logger.info('** Using pretrained encoder word embeddings') model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): if model_opt.pre_word_vecs_dec: logger.info('** Using pretrained decoder word embeddings') model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model