Esempio n. 1
0
def build_decoder_and_generator(model_opt, fields):

    # Build decoder.
    tgt_field = fields["tgt"]
    tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False)

    decoder = build_decoder(model_opt, tgt_emb)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size,
                      len(fields["tgt"].base_field.vocab)),
            Cast(torch.float32), gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        tgt_base_field = fields["tgt"].base_field
        vocab_size = len(tgt_base_field.vocab)
        pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token]
        generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx)

    return decoder, generator, tgt_emb
Esempio n. 2
0
def make_generator(model_opt, decoder, fields, des='tgt'):
    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields[des].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields[des].vocab)
    return generator
Esempio n. 3
0
def build_model(opt, dicts, nSets):
    
    opt = update_opt(opt)
    
    encoder = onmt.Models.Encoder(opt, dicts['src'])
    decoder = onmt.Models.Decoder(opt, dicts['tgt'], nSets)
    
    if opt.copy_pointer == True:
        generator = CopyGenerator(opt, dicts['tgt'])
    else:
        generator = onmt.Models.Generator(opt, dicts['tgt'])
        
    print(generator)
        
    model = onmt.Models.NMTModel(encoder, decoder)
    
    if opt.share_embedding:
        model.shareEmbedding(dicts)
    if opt.share_projection:
        model.shareProjection(generator)
    
    return model, generator
Esempio n. 4
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        "Unsupported model type %s" % model_opt.model_type

    # for backward compatibility
    if model_opt.rnn_size != -1:
        model_opt.enc_rnn_size = model_opt.rnn_size
        model_opt.dec_rnn_size = model_opt.rnn_size

    # Build embeddings.
    if model_opt.model_type == "text":
        src_fields = [f for n, f in fields['src']]
        assert len(src_fields) == 1
        src_field = src_fields[0]
        src_emb = build_embeddings(model_opt, src_field)
    else:
        src_emb = None

    # Build encoder.
    encoder = build_encoder(model_opt, src_emb)

    # Build decoder.
    tgt_fields = [f for n, f in fields['tgt']]
    assert len(tgt_fields) == 1
    tgt_field = tgt_fields[0]
    tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        assert src_field.base_field.vocab == tgt_field.base_field.vocab, \
            "preprocess with -share_vocab if you use share_embeddings"

        tgt_emb.word_lut.weight = src_emb.word_lut.weight

    decoder = build_decoder(model_opt, tgt_emb)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.NMTModel(encoder, decoder)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size,
                      len(fields["tgt"][0][1].base_field.vocab)), gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        assert len(fields["tgt"]) == 1
        tgt_base_field = fields["tgt"][0][1].base_field
        vocab_size = len(tgt_base_field.vocab)
        pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token]
        generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = {
            fix_key(k): v
            for k, v in checkpoint['model'].items()
        }
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            if model_opt.bert:
                for p in model.decoder.parameters():
                    p.data.uniform_(-model_opt.param_init,
                                    model_opt.param_init)
            else:
                for p in model.parameters():
                    p.data.uniform_(-model_opt.param_init,
                                    model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            if model_opt.bert:
                for p in model.decoder.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)
            else:
                for p in model.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
        if model_opt.pre_word_vecs_enc is not None:
            if hasattr(model.encoder, 'embeddings'):
                model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc)
        if model_opt.pre_word_vecs_dec is not None:
            if hasattr(model.decoder, 'embeddings'):
                model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec)

    model.generator = generator
    model.to(device)

    return model
Esempio n. 5
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    # Make NMTModel(= encoder + decoder).
    if model_opt.encoder_type == "trigramrnn" and model_opt.decoder_type == "rnn":
        decoder = make_decoder(model_opt, tgt_embeddings)
        model = NMTSourceTrigramModel(encoder, decoder)
    elif model_opt.encoder_type == "brnn" and model_opt.decoder_type == "charrnn":
        [decoder1, decoder2] = make_decoder(model_opt, tgt_embeddings)
        model = NMTTargetCharModel(encoder, decoder1, decoder2)
    elif model_opt.encoder_type == "trigramrnn" and model_opt.decoder_type == "charrnn":
        [decoder1, decoder2] = make_decoder(model_opt, tgt_embeddings)
        model = CharNMTModel(encoder, decoder1, decoder2)
    else:
        decoder = make_decoder(model_opt, tgt_embeddings)
        model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Initializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            if model_opt.pre_encoder:
                pretrained = torch.load(model_opt.pre_encoder)
                encoder_dict = {}
                for key in pretrained['model']:
                    if key.startswith('encoder'):
                        encoder_dict[key] = pretrained['model'][key]
                model_dict = model.state_dict()
                model_dict.update(encoder_dict)
                model.load_state_dict(model_dict)

                model.encoder.requires_grad = False
                for p in model.encoder.parameters():
                    p.requires_grad = False

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if model_opt.decoder_type == "charrnn":
            if hasattr(model.decoder1, 'embeddings'):
                model.decoder1.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)
        else:
            if hasattr(model.decoder, 'embeddings'):
                model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 6
0
def build_base_model(model_opt, fields, gpu, checkpoint=None, gpu_id=None):
    """Build a model from opts.

    Args:
        model_opt: the option loaded from checkpoint. It's important that
            the opts have been updated and validated. See
            :class:`onmt.utils.parse.ArgumentParser`.
        fields (dict[str, torchtext.data.Field]):
            `Field` objects for the model.
        gpu (bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
        gpu_id (int or NoneType): Which GPU to use.

    Returns:
        the NMTModel.
    """

    # Build embeddings.
    if model_opt.model_type == "text":
        src_field = fields["src"]
        src_emb = build_embeddings(model_opt, src_field)
    else:
        src_emb = None

    # Build encoder.
    encoder = build_encoder(model_opt, src_emb)

    # Build decoder.
    if model_opt.domain_cls_enc == False:
        tgt_field = fields["tgt"]
        tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings and model_opt.encoder_type != 'bert':
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        assert src_field.base_field.vocab == tgt_field.base_field.vocab, \
            "preprocess with -share_vocab if you use share_embeddings"

        tgt_emb.word_lut.weight = src_emb.word_lut.weight

    if model_opt.domain_cls_enc == False:
        decoder = build_decoder(model_opt, tgt_emb)

    # Build NMTModel(= encoder + decoder).
    if gpu and gpu_id is not None:
        device = torch.device("cuda", gpu_id)
    elif gpu and not gpu_id:
        device = torch.device("cuda")
    elif not gpu:
        device = torch.device("cpu")

    if model_opt.domain_cls_enc:
        model = onmt.models.Domain_CLS_ENC(encoder, model_opt)
    else:
        model = onmt.models.NMTModel(encoder, decoder, tgt_field, model_opt)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)

        if model_opt.user_bias != "none":
            # Multi-task case: user_bias + domain_cls
            generator = nn.Sequential(
                nn.Linear(model_opt.dec_rnn_size,
                          len(fields["tgt"].base_field.vocab)),
                Cast(torch.float32))
            if model_opt.domain_classify:
                dom_classifier = nn.Sequential(
                    nn.Linear(model_opt.dec_rnn_size, model_opt.domain_len),
                    Cast(torch.float32), gen_func)
                model.dom_classifier = dom_classifier

        else:
            # Single-task case: user_bias or user_cls or domain_cls
            generator = nn.Sequential(
                nn.Linear(model_opt.dec_rnn_size,
                          len(fields["tgt"].base_field.vocab)),
                Cast(torch.float32), gen_func)
            if model_opt.user_classify:
                classifier = nn.Sequential(
                    nn.Linear(model_opt.dec_rnn_size, model_opt.user_len),
                    Cast(torch.float32), gen_func)
                model.classifier = classifier
            if model_opt.domain_classify or model_opt.domain_cls_enc:
                dom_classifier = nn.Sequential(
                    nn.Linear(model_opt.dec_rnn_size, model_opt.domain_len),
                    Cast(torch.float32), gen_func)
                model.dom_classifier = dom_classifier

        if model_opt.share_decoder_embeddings and model_opt.domain_cls_enc == False:
            if not model_opt.copy_attn:
                generator[0].weight = decoder.embeddings.word_lut.weight
            else:
                generator.linear.weight = decoder.embeddings.word_lut.weight
    else:
        tgt_base_field = fields["tgt"].base_field
        vocab_size = len(tgt_base_field.vocab)
        pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token]
        generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx)

    #Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = {
            fix_key(k): v
            for k, v in checkpoint['model'].items()
        }
        # end of patch for backward compatibility
        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    elif model_opt.encoder_type != 'bert' or model_opt.decoder_type != 'bert':
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if (hasattr(model.encoder, 'embeddings')
                and not model_opt.encoder_type == 'bert'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc)
        if model_opt.domain_cls_enc == False and (
                hasattr(model.decoder, 'embeddings')
                and not model_opt.decoder_type == 'bert'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec)

    if model_opt.encoder_type == 'bert' or model_opt.decoder_type == 'bert':
        if model_opt.bert_type != 'none':
            model_opt.enc_bert_type = model_opt.bert_type
            model_opt.dec_bert_type = model_opt.bert_type

        if model_opt.enc_bert_type != 'none' and checkpoint is None:
            model.encoder.initialize_bert(model_opt.enc_bert_type)

        if model_opt.dec_bert_type != 'none' and checkpoint is None:
            model.decoder.initialize_bert(model_opt.dec_bert_type)

        # Tie word embedding layer of encoder BERT and decoder
        if model_opt.encoder_type == 'bert' and model_opt.share_embeddings:
            decoder.embeddings.word_lut.weight = \
                encoder.embeddings.word_lut.weight

        # Tie decoder word embedding layer with generator weights
        if model_opt.share_decoder_embeddings:
            if not model_opt.copy_attn:
                generator[0].weight = \
                    decoder.embeddings.word_lut.weight
            else:
                generator.linear.weight = \
                    decoder.embeddings.word_lut.weight

    if model_opt.encoder_type == 'bert' and model_opt.decoder_type == 'bert':
        # Tie word, position and token_type embedding
        # layers of encoder and decoder BERT
        if model_opt.share_embeddings:
            decoder.embeddings.position_embeddings.weight = \
                encoder.embeddings.position_embeddings.weight
            decoder.embeddings.token_type_embeddings.weight = \
                encoder.embeddings.token_type_embeddings.weight

        # Tie self-attention between encoder and decoder
        if model_opt.share_self_attn:
            for encoder_layer, decoder_layer in zip(
                    encoder.encoder.layer, decoder.transformer_layers):
                # QUERY
                clone_or_share_layer(decoder_layer.self_attn.linear_query,
                                     encoder_layer.attention.self.query,
                                     share=True)

                # KEY
                clone_or_share_layer(decoder_layer.self_attn.linear_keys,
                                     encoder_layer.attention.self.key,
                                     share=True)

                # VALUE
                clone_or_share_layer(decoder_layer.self_attn.linear_values,
                                     encoder_layer.attention.self.value,
                                     share=True)

                # MULTIHEAD ATTN FINAL LINEAR LAYER
                clone_or_share_layer(decoder_layer.self_attn.final_linear,
                                     encoder_layer.attention.output.dense,
                                     share=True)

        # Tie context-attention with self-attention
        if model_opt.tie_context_attn:
            for decoder_layer in decoder.transformer_layers:
                # QUERY
                clone_or_share_layer(decoder_layer.context_attn.linear_query,
                                     decoder_layer.self_attn.linear_query,
                                     share=True)

                # KEY
                clone_or_share_layer(decoder_layer.context_attn.linear_keys,
                                     decoder_layer.self_attn.linear_keys,
                                     share=True)

                # VALUE
                clone_or_share_layer(decoder_layer.context_attn.linear_values,
                                     decoder_layer.self_attn.linear_values,
                                     share=True)

                # MULTIHEAD ATTN FINAL LINEAR LAYER
                clone_or_share_layer(decoder_layer.context_attn.final_linear,
                                     decoder_layer.self_attn.final_linear,
                                     share=True)

        # Tie positionwise feedforward between encoder and decoder
        if model_opt.share_feed_forward:
            for encoder_layer, decoder_layer in zip(
                    encoder.encoder.layer, decoder.transformer_layers):

                # TRANSFORMER FF
                clone_or_share_layer(decoder_layer.intermediate.dense,
                                     encoder_layer.intermediate.dense,
                                     share=True)

                clone_or_share_layer(decoder_layer.output.dense,
                                     encoder_layer.output.dense,
                                     share=True)

    model.generator = generator
    model.to(device)
    if model_opt.model_dtype == 'fp16':
        model.half()

    return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, train_part="all"):
    """
	Args:
		model_opt: the option loaded from checkpoint.
		fields: `Field` objects for the model.
		gpu(bool): whether to use gpu.
		checkpoint: the model gnerated by train phase, or a resumed snapshot
					model from a stopped training.
	Returns:
		the NMTModel.
	"""
    assert model_opt.model_type in ["text", "img", "audio"], \
     ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)
    context = make_context(model_opt, tgt_dict)

    # Make NMTModel(= encoder + decoder).
    if model_opt.RISK_ratio > 0.0:
        scorer = onmt.translate.GNMTGlobalScorer(model_opt.alpha,
                                                 model_opt.beta,
                                                 model_opt.coverage_penalty,
                                                 model_opt.length_penalty)
        model = NMTModel(encoder,
                         decoder,
                         context,
                         context_type=model_opt.context_type,
                         tgt_vocab=fields['tgt'].vocab,
                         beam_size=model_opt.beam_size,
                         n_best=model_opt.n_best,
                         gpu=gpu,
                         scorer=scorer,
                         min_length=model_opt.min_length,
                         max_length=model_opt.max_length,
                         stepwise_penalty=model_opt.stepwise_penalty,
                         block_ngram_repeat=model_opt.block_ngram_repeat,
                         ignore_when_blocking=model_opt.ignore_when_blocking,
                         copy_attn=model_opt.copy_attn,
                         context_size=model_opt.context_size)
    else:
        model = NMTModel(encoder,
                         decoder,
                         context,
                         context_type=model_opt.context_type)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model_dict = checkpoint['model']
        if train_part == "context":
            model_dict = model.state_dict()
            if 'join' in model_opt.context_type:
                pretrained_dict = {}
                for k, v in checkpoint['model'].items():
                    if k in model_dict:
                        if 'doc_context' in k:
                            k = k.replace('doc_context', 'doc_context.0')
                        pretrained_dict[k] = v
            else:
                pretrained_dict = {
                    k: v
                    for k, v in checkpoint['model'].items()
                    if k in model_dict and 'doc_context' not in k
                }
            model_dict.update(pretrained_dict)

        model.load_state_dict(model_dict, strict=False)
        generator.load_state_dict(checkpoint['generator'])
        if train_part == "context":
            print("Freezing parameters of main model")
            for param in model.parameters():
                param.require_grad = False
            for param in generator.parameters():
                param.require_grad = False
            print("Unfreezing parameters of context")
            for param in model.doc_context.parameters():
                param.require_grad = True
                if model_opt.param_init != 0.0:
                    param.data.uniform_(-model_opt.param_init,
                                        model_opt.param_init)
                if model_opt.param_init_glorot:
                    if param.dim() > 1:
                        xavier_uniform(param)
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 8
0
def build_end2end_model(model_opt,
                        fields,
                        gpu,
                        checkpoint=None,
                        sel_checkpoint=None,
                        s2s_gen_checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
        sel_checkpoint: the model gnerated by selector pre-train phase.
    Returns:
        the E2EModel.
    """
    assert model_opt.model_type in ["text"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build selector.
    src_dict = fields["src"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
    sel_src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
    selector = build_selector(model_opt, sel_src_embeddings)

    # Build encoder
    if model_opt.e2e_type == "separate_enc_sel":
        if model_opt.selector_share_embeddings:
            # the shared embeddings are in the encoder.embeddings
            # TODO: change the state name to load the embeddings in the pretrained selector embeddings
            assert model_opt.load_pretrained_selector_from == ''
            src_embeddings = build_embeddings(model_opt, src_dict,
                                              feature_dicts)
            src_embeddings.word_lut.weight = sel_src_embeddings.word_lut.weight
        else:
            src_embeddings = build_embeddings(model_opt, src_dict,
                                              feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    else:
        # model_opt.e2e_type == "share_enc_sel"
        src_embeddings = sel_src_embeddings
        encoder = None

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build E2EModel(= encoder + selector + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.E2EModel(encoder,
                                 selector,
                                 decoder,
                                 e2e_type=model_opt.e2e_type,
                                 use_gt_sel_probs=model_opt.use_gt_sel_probs)
    model.model_type = model_opt.model_type

    # Build Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['end2end_model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if sel_checkpoint is not None:
            model.load_state_dict(sel_checkpoint['selector'], strict=False)

        if s2s_gen_checkpoint is not None:
            model.load_state_dict(s2s_gen_checkpoint['model'], strict=False)
            generator.load_state_dict(s2s_gen_checkpoint['generator'])

        # if hasattr(model.encoder, 'embeddings'):
        #     model.encoder.embeddings.load_pretrained_vectors(
        #         model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        # if hasattr(model.decoder, 'embeddings'):
        #     model.decoder.embeddings.load_pretrained_vectors(
        #         model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Esempio n. 9
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def build_base_model(model_opt, fields, gpu, checkpoint=None, gpu_id=None):
    """Build a model from opts.

    Args:
        model_opt: the option loaded from checkpoint. It's important that
            the opts have been updated and validated. See
            :class:`onmt.utils.parse.ArgumentParser`.
        fields (dict[str, torchtext.data.Field]):
            `Field` objects for the model.
        gpu (bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
        gpu_id (int or NoneType): Which GPU to use.

    Returns:
        the NMTModel.
    """

    # for back compat when attention_dropout was not defined
    try:
        model_opt.attention_dropout
    except AttributeError:
        model_opt.attention_dropout = model_opt.dropout

    # Build embeddings.
    src_field = fields["src"]
    src_emb = build_embeddings(model_opt, src_field)
    tgt_field = fields["tgt"]
    tgt_emb = build_embeddings(model_opt, tgt_field)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        assert src_field.base_field.vocab == tgt_field.base_field.vocab, \
            "preprocess with -share_vocab if you use share_embeddings"

        tgt_emb.word_lut.weight = src_emb.word_lut.weight

    # Build encoder.
    encoder_x2y = build_encoder(model_opt, src_emb)
    encoder_y2x = build_encoder(model_opt, tgt_emb)

    # Build decoder.
    decoder_x2y = build_decoder(model_opt, tgt_emb)
    decoder_y2x = build_decoder(model_opt, src_emb)


    def share_attn_weight_and_bias(attn1, attn2, 
                                   share_relative_pos_embeddings=False):
        attn2.linear_keys = attn1.linear_keys
        attn2.linear_values = attn1.linear_values
        attn2.linear_query = attn1.linear_query
        attn2.final_linear = attn1.final_linear
        if share_relative_pos_embeddings:
            assert model_opt.max_relative_positions > 0
            attn2.relative_positions_embeddings = \
                attn1.relative_positions_embeddings

    # logger.info('share encoder')
    encoder_y2x = encoder_x2y
    # logger.info('share cross_attns btw fwd & bwd decoders')
    for dec1, dec2 in zip(decoder_x2y.transformer_layers, 
                            decoder_y2x.transformer_layers):
        share_attn_weight_and_bias(dec1.context_attn, dec2.context_attn)

    # logger.info('share self_attns btw fwd & bwd decoders')
    for dec1, dec2 in zip(decoder_x2y.transformer_layers, 
                            decoder_y2x.transformer_layers):
        share_attn_weight_and_bias(dec1.self_attn, dec2.self_attn,
                                    model_opt.share_relative_pos_embeddings)
    # logger.info('share feed_forwards btw fwd & bwd decoders')
    for dec1, dec2 in zip(decoder_x2y.transformer_layers, 
                            decoder_y2x.transformer_layers):
        dec2.feed_forward.w_1 = dec1.feed_forward.w_1
        dec2.feed_forward.w_2 = dec1.feed_forward.w_2

    # Build NMTModel(= encoder + decoder).
    if gpu and gpu_id is not None:
        device = torch.device("cuda", gpu_id)
    elif gpu and not gpu_id:
        device = torch.device("cuda")
    elif not gpu:
        device = torch.device("cpu")
    model = onmt.models.NMTModel(encoder_x2y, encoder_y2x, 
                                 decoder_x2y, decoder_y2x)

    # Build prior modeling
    prior = None
    if model_opt.learned_prior:
        assert model_opt.num_experts > 1
        prior = onmt.models.Classifier(
            model_opt.enc_rnn_size, model_opt.num_experts, 
            dropout=(model_opt.dropout[0] if type(model_opt.dropout) is list
                     else model_opt.dropout))

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator_x2y = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size,
                      len(fields["tgt"].base_field.vocab)),
            Cast(torch.float32),
            gen_func
        )
        generator_y2x = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size,
                      len(fields["src"].base_field.vocab)),
            Cast(torch.float32),
            gen_func
        )
        if model_opt.share_decoder_embeddings:
            generator_x2y[0].weight = decoder_x2y.embeddings.word_lut.weight
            generator_y2x[0].weight = decoder_y2x.embeddings.word_lut.weight
    else:
        tgt_base_field = fields["tgt"].base_field
        vocab_size = len(tgt_base_field.vocab)
        pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token]
        generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx)
        if model_opt.share_decoder_embeddings:
            generator_x2y.linear.weight = decoder_x2y.embeddings.word_lut.weight
            generator_y2x.linear.weight = decoder_y2x.embeddings.word_lut.weight

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = {fix_key(k): v
                               for k, v in checkpoint['model'].items()}
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator_x2y.load_state_dict(checkpoint['generator_x2y'], strict=False)
        generator_y2x.load_state_dict(checkpoint['generator_y2x'], strict=False)
        if model_opt.learned_prior:
            prior.load_state_dict(checkpoint['prior'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            def init_param(target_model):
                for p in target_model.parameters():
                    p.data.uniform_(-model_opt.param_init, 
                                    model_opt.param_init)
            init_param(model)
            init_param(generator_x2y)
            init_param(generator_y2x)
            if model_opt.learned_prior:
                init_param(prior)
        if model_opt.param_init_glorot:
            def init_glorot(target_model):
                for p in target_model.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)
            init_glorot(model)
            init_glorot(generator_x2y)
            init_glorot(generator_y2x)
            if model_opt.learned_prior:
                init_glorot(prior)

    model.generator_x2y = generator_x2y
    model.generator_y2x = generator_y2x
    model.prior = prior
    model.to(device)
    if model_opt.model_dtype == 'fp16' and model_opt.optim == 'fusedadam':
        model.half()
    return model
Esempio n. 11
0
def make_latent_variable_LSTM(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """

    # Make encoder.
    src_dict = fields["src"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')

    # Seq2seq encoder
    src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
    encoder = make_encoder(model_opt, src_embeddings)

    # Latent variable-approximate distribution-mu/distribution-logvar
    src_embeddings_approx = make_embeddings(model_opt,
                                            src_dict,
                                            feature_dicts,
                                            for_vae=True)
    enc_approx = make_encoder(model_opt, src_embeddings_approx, for_vae=True)
    approx_mu = nn.Linear(model_opt.rnn_size_vae, model_opt.size_vae)
    approx_logvar = nn.Linear(model_opt.rnn_size_vae, model_opt.size_vae)

    # Latent variable-true posterior-mu/posterior-logvar
    src_embeddings_true = make_embeddings(model_opt,
                                          src_dict,
                                          feature_dicts,
                                          for_vae=True)
    enc_true = make_encoder(model_opt, src_embeddings_true, for_vae=True)
    true_mu = nn.Linear(model_opt.rnn_size_vae, model_opt.size_vae)
    true_logvar = nn.Linear(model_opt.rnn_size_vae, model_opt.size_vae)

    # For AVE-GlobalMemory
    glb = nn.Linear(model_opt.rnn_size,
                    model_opt.size_vae + model_opt.rnn_size + model_opt.size_c)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Control variable
    glv = GloVe_Discriminator(gpu)
    glv_model = glv.load_model(model_opt.glove_dir)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = LatentVaraibleModel(encoder, decoder, tgt_dict, enc_approx,
                                approx_mu, approx_logvar, enc_true, true_mu,
                                true_logvar, glb, glv, model_opt.max_gen_len,
                                gpu)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(
                model_opt.rnn_size + model_opt.size_vae + model_opt.size_c,
                len(fields["tgt"].vocab)), nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(
            model_opt.rnn_size + model_opt.size_vae + model_opt.size_c,
            fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 12
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type == "text", ("Unsupported model type %s" %
                                            (model_opt.model_type))

    # Build encoder.
    logger.info("build_base_model")
    if model_opt.model_type == "text":
        src_dict = fields[
            "src"].vocab  # torchtext.vocab.Vocab object: dict_keys(['vectors', 'stoi', 'freqs', 'itos'])

        feature_dicts = inputters.collect_feature_vocabs(fields,
                                                         'src')  # list: []
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        '''  Embeddings(
  (make_embedding): Sequential(
    (emb_luts): Elementwise(
      (0): Embedding(24997, 500, padding_idx=1)
    )
  )
)
        '''
        #logger.info("src embeddings")
        #logger.info(src_embeddings)
        logger.info("bulding question encoder")
        encoder = build_encoder(model_opt, src_embeddings)
        logger.info(encoder)

        ############### Modified ###############################

        ans_dict = fields["ans"].vocab
        ans_embeddings = build_embeddings(model_opt, ans_dict, feature_dicts)
        logger.info("building answer encoder")
        encoder_ans = build_encoder(model_opt, ans_embeddings)
        logger.info(encoder_ans)
        ##########################################################s

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = NMTModel(encoder, encoder_ans, decoder)

    model.model_type = model_opt.model_type

    # Build Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size * 2, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Esempio n. 13
0
def make_base_model(model_opt, fields, gpu, checkpoint=None, stage1=True):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    if stage1:
        src = "src1"
        tgt = "tgt1"
    else:
        src = "src2"
        tgt = "tgt2"
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields[src].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, src)
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)

        table_embeddings = make_embeddings(model_opt,
                                           src_dict,
                                           feature_dicts,
                                           discard_word=True)
        # reusing the same embedding weights
        print(table_embeddings.make_embedding[0])
        table_embeddings.word_lut.weight = src_embeddings.word_lut.weight
        table_embeddings.field_lut.weight = src_embeddings.field_lut.weight
        table_embeddings.type_lut.weight = src_embeddings.type_lut.weight
        table_embeddings.ha_lut.weight = src_embeddings.ha_lut.weight

        encoder = make_encoder(model_opt, (src_embeddings, table_embeddings),
                               stage1)

    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields[tgt].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, tgt)
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    # NOTE: make decoder
    decoder = make_decoder(model_opt, tgt_embeddings, stage1)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if stage1:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt1"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        # NOTE: CopyGenerator
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt2"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 14
0
def build_base_model(model_opt,
                     fields,
                     gpu,
                     FeatureValues,
                     FeatureTensors,
                     FeatureTypes,
                     FeaturesList,
                     FeatureNames,
                     FTInfos,
                     FeatureTypesNames,
                     SimulationLanguages,
                     checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        WALS info
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        if ("image_channel_size" not in model_opt.__dict__):
            image_channel_size = 3
        else:
            image_channel_size = model_opt.image_channel_size

        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               image_channel_size)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Wals

    print(
        'Building embeddings for each WALS feature and MLP models for each feature type...'
    )

    embeddings_list, embeddings_keys, mlp_list, mlp_keys = [], [], [], []

    for FeatureType in FeatureTypes:

        list_features = FeatureType[1]

        for Feature in list_features:

            globals()['embedding_%s' % Feature] = build_feature_embeddings(
                gpu, FeatureTensors, FeaturesList, FeatureNames,
                Feature)  # 192 embedding structures, one for each feature.
            embeddings_keys.append(Feature)
            embeddings_list.append(globals()['embedding_%s' % Feature])
        globals()['mlp_%s' % FeatureType[0]] = build_mlp_feature_type(
            model_opt, FTInfos, FeatureTypesNames,
            FeatureType[0])  # 11 MLPs, one for each feature type.
        mlp_keys.append(FeatureType[0])
        mlp_list.append(globals()['mlp_%s' % FeatureType[0]])

    embeddings_dic_keys = dict(zip(embeddings_keys, embeddings_list))
    EmbeddingFeatures = nn.ModuleDict(embeddings_dic_keys)

    mlp_dic_keys = dict(zip(mlp_keys, mlp_list))

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")

    if model_opt.wals_model == 'EncInitHidden_Target':

        MLP2RNNHiddenSize_Target = build_mlp2rnnhiddensize_target(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = EncoderInitialization(model_opt.wals_model, encoder, decoder,
                                      MLP2RNNHiddenSize_Target,
                                      EmbeddingFeatures, FeatureValues,
                                      FeatureTypes, SimulationLanguages,
                                      model_opt)
        print(
            "Model created: uses WALS features from the target language to initialize encoder's hidden state."
        )

    elif model_opt.wals_model == 'EncInitHidden_Both':

        MLP2RNNHiddenSize_Both = build_mlp2rnnhiddensize_both(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = EncoderInitialization(model_opt.wals_model, encoder, decoder,
                                      MLP2RNNHiddenSize_Both,
                                      EmbeddingFeatures, FeatureValues,
                                      FeatureTypes, SimulationLanguages,
                                      model_opt)
        print(
            "Model created: uses WALS features from the source and target languages to initialize encoder's hidden state."
        )

    elif model_opt.wals_model == 'DecInitHidden_Target':

        MLP2RNNHiddenSize_Target = build_mlp2rnnhiddensize_target(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = DecoderInitialization(model_opt.wals_model, encoder, decoder,
                                      MLP2RNNHiddenSize_Target,
                                      EmbeddingFeatures, FeatureValues,
                                      FeatureTypes, SimulationLanguages,
                                      model_opt)
        print(
            "Model created: adds WALS features from the target language to the encoder's output to initialize decoder's hidden state."
        )

    elif model_opt.wals_model == 'DecInitHidden_Both':

        MLP2RNNHiddenSize_Both = build_mlp2rnnhiddensize_both(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = DecoderInitialization(model_opt.wals_model, encoder, decoder,
                                      MLP2RNNHiddenSize_Both,
                                      EmbeddingFeatures, FeatureValues,
                                      FeatureTypes, SimulationLanguages,
                                      model_opt)
        print(
            "Model created: adds WALS features from the source and target languages to the encoder's output to initialize decoder's hidden state."
        )

    elif model_opt.wals_model == 'WalstoSource_Target':

        MLP2WALSHiddenSize_Target = build_mlp2walshiddensize_target(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = CombineWalsSourceWords(model_opt.wals_model, encoder, decoder,
                                       MLP2WALSHiddenSize_Target,
                                       EmbeddingFeatures, FeatureValues,
                                       FeatureTypes, SimulationLanguages,
                                       model_opt)
        print(
            "Model created: concatenates WALS features from the target language to source words embeddings."
        )

    elif model_opt.wals_model == 'WalstoSource_Both':

        MLP2WALSHiddenSize_Both = build_mlp2walshiddensize_both(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = CombineWalsSourceWords(model_opt.wals_model, encoder, decoder,
                                       MLP2WALSHiddenSize_Both,
                                       EmbeddingFeatures, FeatureValues,
                                       FeatureTypes, SimulationLanguages,
                                       model_opt)
        print(
            "Model created: concatenates WALS features from the source and target languages to source words embeddings."
        )

    elif model_opt.wals_model == 'WalstoTarget_Target':

        MLP2WALSHiddenSize_Target = build_mlp2walshiddensize_target(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = CombineWalsTargetWords(model_opt.wals_model, encoder, decoder,
                                       MLP2WALSHiddenSize_Target,
                                       EmbeddingFeatures, FeatureValues,
                                       FeatureTypes, SimulationLanguages,
                                       model_opt)
        print(
            "Model created: concatenates WALS features from the target language to target words embeddings."
        )

    elif model_opt.wals_model == 'WalstoTarget_Both':

        MLP2WALSHiddenSize_Both = build_mlp2walshiddensize_both(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = CombineWalsTargetWords(model_opt.wals_model, encoder, decoder,
                                       MLP2WALSHiddenSize_Both,
                                       EmbeddingFeatures, FeatureValues,
                                       FeatureTypes, SimulationLanguages,
                                       model_opt)
        print(
            "Model created: concatenates WALS features from the source and target languages to target words embeddings."
        )

    elif model_opt.wals_model == 'WalsDoublyAttentive_Target':

        MLPFeatureTypes = nn.ModuleDict(mlp_dic_keys)
        MLP_AttentionTarget = build_doublyattentive_target(model_opt)
        print('Embeddings for WALS features and MLP models are built!')
        model = WalsDoublyAttention(model_opt.wals_model, encoder, decoder,
                                    MLP_AttentionTarget, MLPFeatureTypes,
                                    EmbeddingFeatures, FeatureValues,
                                    FeatureTypes, SimulationLanguages,
                                    model_opt)
        print(
            "Model created: the WALS features from the target language are incorporated as an additional attention mechanism."
        )

    elif model_opt.wals_model == 'WalsDoublyAttentive_Both':

        MLPFeatureTypes = nn.ModuleDict(mlp_dic_keys)
        MLP_AttentionBoth = build_doublyattentive_both(model_opt)
        print('Embeddings for WALS features and MLP models are built!')
        model = WalsDoublyAttention(model_opt.wals_model, encoder, decoder,
                                    MLP_AttentionBoth, MLPFeatureTypes,
                                    EmbeddingFeatures, FeatureValues,
                                    FeatureTypes, SimulationLanguages,
                                    model_opt)
        print(
            "Model created: the WALS features from the source and target languages are incorporated as an additional attention mechanism."
        )

    elif model_opt.wals_model == 'WalstoDecHidden_Target':

        MLP2WALSHiddenSize_Target = build_mlp2walshiddensize_target(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = WalstoDecHidden(model_opt.wals_model, encoder, decoder,
                                MLP2WALSHiddenSize_Target, EmbeddingFeatures,
                                FeatureValues, FeatureTypes,
                                SimulationLanguages, model_opt)
        print(
            "Model created: concatenates WALS features from the target language to decoder hidden state."
        )

    elif model_opt.wals_model == 'WalstoDecHidden_Both':

        MLP2WALSHiddenSize_Both = build_mlp2walshiddensize_both(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = WalstoDecHidden(model_opt.wals_model, encoder, decoder,
                                MLP2WALSHiddenSize_Both, EmbeddingFeatures,
                                FeatureValues, FeatureTypes,
                                SimulationLanguages, model_opt)
        print(
            "Model created: concatenates WALS features from the source and target languages to decoder hidden state."
        )

    else:
        raise Exception("WALS model type not yet implemented: %s" %
                        (opt.wals_model))

    model.model_type = model_opt.model_type

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Esempio n. 15
0
def build_base_model(model_opt, fields, gpu, checkpoint=None, gpu_id=None):
    """Build a model from opts.

    Args:
        model_opt: the option loaded from checkpoint. It's important that
            the opts have been updated and validated. See
            :class:`onmt.utils.parse.ArgumentParser`.
        fields (dict[str, torchtext.data.Field]):
            `Field` objects for the model.
        gpu (bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
        gpu_id (int or NoneType): Which GPU to use.

    Returns:
        the NMTModel.
    """

    # Build embeddings.
    if model_opt.model_type == "text":
        src_field = fields["src"]
        src_emb = build_embeddings(model_opt, src_field)
    else:
        src_emb = None

    # Build encoder.
    encoder = build_encoder(model_opt, src_emb)

    # Build decoder.
    tgt_field = fields["tgt"]
    tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        assert src_field.base_field.vocab == tgt_field.base_field.vocab, \
            "preprocess with -share_vocab if you use share_embeddings"

        tgt_emb.word_lut.weight = src_emb.word_lut.weight

    if model_opt.share_position_embeddings:
        tgt_emb.make_embedding.pe.pe.weight = src_emb.make_embedding.pe.pe.weight

    decoder = build_decoder(model_opt, tgt_emb)

    # Build NMTModel(= encoder + decoder).
    if gpu and gpu_id is not None:
        device = torch.device("cuda", gpu_id)
    elif gpu and not gpu_id:
        device = torch.device("cuda")
    elif not gpu:
        device = torch.device("cpu")

    # Build separate LM if doing simple fusion
    if model_opt.simple_fusion:
        layers = 12
        size = 768
        heads = 12

        lm_decoder_opt = copy.deepcopy(model_opt)
        lm_decoder_opt.dec_layers = layers
        lm_decoder_opt.use_GPT_version_ctxattn = False
        lm_decoder_opt.use_GPT_version_psa = False
        lm_decoder_opt.use_GPT_version_unconditional = True
        lm_decoder_opt.tgt_word_vec_size = size
        lm_decoder_opt.rnn_size = size
        lm_decoder_opt.dec_rnn_size = size
        lm_decoder_opt.transformer_ff = size * 4
        lm_decoder_opt.dec_heads = heads
        lm_decoder_opt.position_encoding_learned_dec = True
        lm_decoder_opt.share_decoder_embeddings = True
        lm_decoder_opt.dropout = 0

        lm_decoder_emb = build_embeddings(lm_decoder_opt,
                                          tgt_field,
                                          for_encoder=False)
        logger.info(lm_decoder_emb)

        lm_decoder = build_decoder(lm_decoder_opt, lm_decoder_emb)
        load_decoder = lm_decoder

        model = onmt.models.SimpleFusionModel(encoder, decoder, lm_decoder)

        generator = SimpleFusionGenerator(model_opt.dec_rnn_size,
                                          lm_decoder_opt.dec_rnn_size,
                                          len(fields["tgt"].base_field.vocab))
        generator.lm_linear.weight = lm_decoder.embeddings.word_lut.weight

        if model_opt.share_decoder_embeddings:
            generator.decoder_linear.weight = decoder.embeddings.word_lut.weight
        gen_linear = generator.lm_linear
    else:
        load_decoder = decoder
        if model_opt.unconditional:
            model = onmt.models.UncondModel(decoder)
        else:
            model = onmt.models.NMTModel(encoder, decoder)

        # Build Generator.
        if not model_opt.copy_attn:
            if model_opt.generator_function == "sparsemax":
                gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
            else:
                gen_func = nn.LogSoftmax(dim=-1)

            if model_opt.padded_vocab_fix_me_later:
                gen_func = nn.Sequential(PadGen(), gen_func)

            generator = nn.Sequential(
                nn.Linear(model_opt.dec_rnn_size,
                          len(fields["tgt"].base_field.vocab)),
                Cast(torch.float32), gen_func)
            if model_opt.share_decoder_embeddings:
                generator[0].weight = decoder.embeddings.word_lut.weight
            gen_linear = generator[0]
        else:
            tgt_base_field = fields["tgt"].base_field
            vocab_size = len(tgt_base_field.vocab)
            pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token]
            generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size,
                                      pad_idx)
            if model_opt.share_decoder_embeddings:
                generator.linear.weight = decoder.embeddings.word_lut.weight
            gen_linear = generator.linear

    if model_opt.encdec_share_params:
        for name, p in decoder.named_parameters():
            if 'ctx' in name or 'context' in name:
                continue
            pointer = encoder
            attrs = name.split('.')
            for attr_name in attrs[:-1]:
                pointer = getattr(pointer, attr_name)

            # pointer now has the encoder version of the parameter parent
            setattr(pointer, attrs[-1], p)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # Normally, just load the model parameters from checkpoint
        if 'gpt2_params' not in checkpoint and 'enc_model' not in checkpoint:
            # This preserves backward-compat for models using customed layernorm
            def fix_key(s):
                s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                           r'\1.layer_norm\2.bias', s)
                s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                           r'\1.layer_norm\2.weight', s)
                return s

            checkpoint['model'] = {
                fix_key(k): v
                for k, v in checkpoint['model'].items()
            }
            # end of patch for backward compatibility

            # Initialize rest of parameters normally
            if hasattr(model_opt,
                       'load_uncond_from') and model_opt.load_uncond_from:
                for p in decoder.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)

                # Always initialize encoder parameters normally
                for p in encoder.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)

                if model_opt.ctx_weight_param:
                    for name, p in decoder.named_parameters():
                        if 'ctx_weight' in name:
                            p.data.zero_()
                        if 'ctx_bias' in name:
                            p.data.fill_(-10)

            model.load_state_dict(checkpoint['model'], strict=False)
            generator.load_state_dict(checkpoint['generator'], strict=False)
        else:
            # load the gpt parameters
            if 'gpt2_params' in checkpoint:
                init_something = model_opt.gpt2_init_embanddec or model_opt.simple_fusion or model_opt.gpt2_init_embandenc or model_opt.GPT_representation_mode != 'none'

                if init_something:
                    # Initialize all the weights first
                    if model_opt.gpt2_init_zero:
                        for p in decoder.parameters():
                            p.data.zero_()
                        if model_opt.simple_fusion:
                            generator.decoder_linear.weight.data.zero_()
                            generator.decoder_linear.bias.data.zero_()
                    else:
                        for p in decoder.parameters():
                            if p.dim() > 1:
                                xavier_uniform_(p)

                    # Always initialize encoder parameters normally
                    if encoder is not None:
                        for p in encoder.parameters():
                            if p.dim() > 1:
                                xavier_uniform_(p)
                    for p in generator.parameters():
                        if p.dim() > 1:
                            xavier_uniform_(p)
                    if model_opt.zero_bias_init:
                        gen_linear.bias.data.zero_()

                    if model_opt.ctx_weight_param:
                        for name, p in decoder.named_parameters():
                            if 'ctx_weight' in name:
                                p.data.zero_()
                            if 'ctx_bias' in name:
                                p.data.fill_(-10)
                        gen_linear.bias.data.zero_()

                load_models = []
                if model_opt.GPT_representation_mode != 'none':
                    load_embs = []
                    if model_opt.GPT_representation_loc in ['both', 'src']:
                        load_models.append(src_emb.gpt_model)
                        load_embs.append(src_emb)
                    if model_opt.GPT_representation_loc in ['both', 'tgt']:
                        load_models.append(tgt_emb.gpt_model)
                        load_embs.append(tgt_emb)

                else:
                    if model_opt.gpt2_init_embanddec or model_opt.simple_fusion:
                        load_models = [load_decoder]
                    elif model_opt.gpt2_init_embandenc:
                        load_models = [encoder]

                it_list = list(checkpoint['gpt2_params'])
                for lm_idx, load_model in enumerate(load_models):
                    #print(lm_idx, load_model)
                    for name, array in it_list:
                        name = name[12:]  # skip "transformer."
                        name = name.split('.')

                        assigned = False
                        if name[0] == 'wpe':
                            if model_opt.GPT_representation_mode != 'none':
                                pointer = load_embs[
                                    lm_idx].make_embedding.pe.pe.weight
                            else:
                                pointer = load_model.embeddings.make_embedding.pe.pe.weight

                        elif name[0] == 'wte':
                            if model_opt.GPT_representation_mode != 'none':
                                pointer = [
                                    load_embs[lm_idx].make_embedding.
                                    emb_luts[0].weight, gen_linear.weight
                                ]
                            else:
                                pointer = [
                                    load_model.embeddings.make_embedding.
                                    emb_luts[0].weight
                                ]
                                if not model_opt.nopretrain_decemb:
                                    pointer.append(gen_linear.weight)
                                if model_opt.simple_fusion and model_opt.sf_pretrain_dec_emb:
                                    pointer.append(
                                        decoder.embeddings.make_embedding.
                                        emb_luts[0].weight)

                        elif name[0] == 'ln_f':
                            if name[1] == 'weight':
                                pointer = load_model.layer_norm.weight
                            elif name[1] == 'bias':
                                pointer = load_model.layer_norm.bias
                            else:
                                raise ValueError(
                                    'I am missing something here!')

                        elif name[0] == 'h':
                            layer_num = name[1]
                            pointer = getattr(load_model.transformer_layers,
                                              layer_num)
                            if name[2] == 'attn':
                                assigned = True
                                pointer = pointer.self_attn
                                full_data = torch.from_numpy(array)
                                if name[3] == 'c_attn':
                                    end_size = full_data.shape[-1] // 3
                                    assert full_data.shape[-1] % 3 == 0
                                    if name[4] == 'bias':
                                        if init_something:
                                            pointer.linear_query.bias.data = full_data[:
                                                                                       end_size]
                                            pointer.linear_keys.bias.data = full_data[
                                                end_size:end_size * 2]
                                            pointer.linear_values.bias.data = full_data[
                                                end_size * 2:]
                                        if model_opt.gpt2_params_std > 0:
                                            pointer.linear_query.bias.orig = full_data[:end_size].clone(
                                            )
                                            pointer.linear_keys.bias.orig = full_data[
                                                end_size:end_size * 2].clone()
                                            pointer.linear_values.bias.orig = full_data[
                                                end_size * 2:].clone()
                                    elif name[4] == 'weight':
                                        if init_something:
                                            pointer.linear_query.weight.data = full_data[:, :end_size].t(
                                            ).contiguous()
                                            pointer.linear_keys.weight.data = full_data[:,
                                                                                        end_size:
                                                                                        end_size
                                                                                        *
                                                                                        2].t(
                                                                                        ).contiguous(
                                                                                        )
                                            pointer.linear_values.weight.data = full_data[:,
                                                                                          end_size
                                                                                          *
                                                                                          2:].t(
                                                                                          ).contiguous(
                                                                                          )
                                        if model_opt.gpt2_params_std > 0:
                                            pointer.linear_query.weight.orig = full_data[:, :end_size].t(
                                            ).contiguous().clone()
                                            pointer.linear_keys.weight.orig = full_data[:,
                                                                                        end_size:
                                                                                        end_size
                                                                                        *
                                                                                        2].t(
                                                                                        ).contiguous(
                                                                                        ).clone(
                                                                                        )
                                            pointer.linear_values.weight.orig = full_data[:, end_size * 2:].t(
                                            ).contiguous().clone()
                                    else:
                                        raise ValueError(
                                            'I am missing something here!')
                                elif name[3] == 'c_proj':
                                    if name[4] == 'bias':
                                        if init_something:
                                            pointer.final_linear.bias.data = full_data
                                        if model_opt.gpt2_params_std > 0:
                                            pointer.final_linear.bias.orig = full_data.clone(
                                            )
                                    elif name[4] == 'weight':
                                        if init_something:
                                            pointer.final_linear.weight.data = full_data.t(
                                            ).contiguous()
                                        if model_opt.gpt2_params_std > 0:
                                            pointer.final_linear.weight.orig = full_data.t(
                                            ).contiguous().clone()

                                    else:
                                        raise ValueError(
                                            'I am missing something here!')

                            elif name[2] == 'ln_1' or name[2] == 'ln_2':
                                num = name[2][3]
                                pointer = getattr(pointer, 'layer_norm_' + num)
                                if name[2] == 'bias':
                                    pointer = pointer.bias
                                elif name[2] == 'weight':
                                    pointer = pointer.weight
                                else:
                                    raise ValueError(
                                        'I am missing something here!')
                            elif name[2] == 'mlp':
                                pointer = pointer.feed_forward
                                pointer = getattr(pointer, name[2])
                                if name[3] == 'bias':
                                    pointer = pointer.bias
                                elif name[3] == 'weight':
                                    pointer = pointer.weight
                                else:
                                    raise ValueError(
                                        'I am missing something here!')
                            else:
                                raise ValueError(
                                    'I am missing something here!')
                        else:
                            raise ValueError('I am missing something here!')

                        if not assigned:
                            # if name[0] == 'wte':
                            #     print(array.shape)
                            #     continue
                            if name[-1] == 'weight':
                                array = array.T

                            if not isinstance(pointer, list):
                                pointer = [pointer]
                            for pointer_i in pointer:
                                target_size = int(math.ceil(
                                    array.shape[0] / 8)) * 8
                                padded_vocab = name[
                                    0] == 'wte' and pointer_i.shape[
                                        0] == target_size
                                padded_vocab = padded_vocab and pointer_i.shape[
                                    1:] == array.shape[1:]
                                try:
                                    assert pointer_i.shape == array.shape or padded_vocab
                                except AssertionError as e:

                                    e.args += (pointer_i.shape, array.shape)
                                    raise
                                if init_something:
                                    print(
                                        "Initialize PyTorch weight {}".format(
                                            name))
                                    if padded_vocab:
                                        pointer_i.data[:array.shape[
                                            0]] = torch.from_numpy(array)
                                    else:
                                        pointer_i.data = torch.from_numpy(
                                            array)
                                if model_opt.gpt2_params_std > 0:
                                    if padded_vocab:
                                        raise NotImplementedError
                                    else:
                                        pointer_i.orig = torch.from_numpy(
                                            array).clone()
                        # name = name[6:]  # skip "model/"
                        # name = name.split('/')

                        # assigned = False
                        # if name[0] == 'wpe':
                        #     if model_opt.GPT_representation_mode != 'none':
                        #         pointer = load_embs[lm_idx].make_embedding.pe.pe.weight
                        #     else:
                        #         pointer = load_model.embeddings.make_embedding.pe.pe.weight

                        # elif name[0] == 'wte':
                        #     if model_opt.GPT_representation_mode != 'none':
                        #         pointer = [load_embs[lm_idx].make_embedding.emb_luts[0].weight, gen_linear.weight]
                        #     else:
                        #         pointer = [load_model.embeddings.make_embedding.emb_luts[0].weight]
                        #         if not model_opt.nopretrain_decemb:
                        #             pointer.append(gen_linear.weight)
                        #         if model_opt.simple_fusion and model_opt.sf_pretrain_dec_emb:
                        #             pointer.append(decoder.embeddings.make_embedding.emb_luts[0].weight)

                        # elif name[0] == 'ln_f':
                        #     if name[1] == 'g':
                        #         pointer = load_model.layer_norm.weight
                        #     elif name[1] == 'b':
                        #         pointer = load_model.layer_norm.bias
                        #     else:
                        #         raise ValueError('I am missing something here!')

                        # elif name[0][0] == 'h':
                        #     layer_num = name[0][1:]
                        #     pointer = getattr(load_model.transformer_layers, layer_num)
                        #     if name[1] == 'attn':
                        #         assigned = True
                        #         pointer = pointer.self_attn
                        #         full_data = torch.from_numpy(array)
                        #         if name[2] == 'c_attn':
                        #             end_size = full_data.shape[-1]//3
                        #             assert full_data.shape[-1] % 3 == 0
                        #             if name[3] == 'b':
                        #                 if init_something:
                        #                     pointer.linear_query.bias.data = full_data[:end_size]
                        #                     pointer.linear_keys.bias.data = full_data[end_size:end_size*2]
                        #                     pointer.linear_values.bias.data = full_data[end_size*2:]
                        #                 if model_opt.gpt2_params_std > 0:
                        #                     pointer.linear_query.bias.orig = full_data[:end_size].clone()
                        #                     pointer.linear_keys.bias.orig = full_data[end_size:end_size*2].clone()
                        #                     pointer.linear_values.bias.orig = full_data[end_size*2:].clone()
                        #             elif name[3] == 'w':
                        #                 if init_something:
                        #                     pointer.linear_query.weight.data = full_data[:, :end_size].t().contiguous()
                        #                     pointer.linear_keys.weight.data = full_data[:, end_size:end_size*2].t().contiguous()
                        #                     pointer.linear_values.weight.data = full_data[:, end_size*2:].t().contiguous()
                        #                 if model_opt.gpt2_params_std > 0:
                        #                     pointer.linear_query.weight.orig = full_data[:, :end_size].t().contiguous().clone()
                        #                     pointer.linear_keys.weight.orig = full_data[:, end_size:end_size*2].t().contiguous().clone()
                        #                     pointer.linear_values.weight.orig = full_data[:, end_size*2:].t().contiguous().clone()
                        #             else:
                        #                 raise ValueError('I am missing something here!')
                        #         elif name[2] == 'c_proj':
                        #             if name[3] == 'b':
                        #                 if init_something:
                        #                     pointer.final_linear.bias.data = full_data
                        #                 if model_opt.gpt2_params_std > 0:
                        #                     pointer.final_linear.bias.orig = full_data.clone()
                        #             elif name[3] == 'w':
                        #                 if init_something:
                        #                     pointer.final_linear.weight.data = full_data.t().contiguous()
                        #                 if model_opt.gpt2_params_std > 0:
                        #                     pointer.final_linear.weight.orig = full_data.t().contiguous().clone()

                        #             else:
                        #                 raise ValueError('I am missing something here!')

                        #     elif name[1] == 'ln_1' or name[1] == 'ln_2':
                        #         num = name[1][3]
                        #         pointer = getattr(pointer, 'layer_norm_'+num)
                        #         if name[2] == 'b':
                        #             pointer = pointer.bias
                        #         elif name[2] == 'g':
                        #             pointer = pointer.weight
                        #         else:
                        #             raise ValueError('I am missing something here!')
                        #     elif name[1] == 'mlp':
                        #         pointer = pointer.feed_forward
                        #         pointer = getattr(pointer, name[2])
                        #         if name[3] == 'b':
                        #             pointer = pointer.bias
                        #         elif name[3] == 'w':
                        #             pointer = pointer.weight
                        #         else:
                        #             raise ValueError('I am missing something here!')
                        #     else:
                        #         raise ValueError('I am missing something here!')
                        # else:
                        #     raise ValueError('I am missing something here!')

                        # if not assigned:
                        #     if name[0] == 'wte':
                        #         print(array.shape)
                        #         continue
                        #     if name[-1] == 'w' or name[-1] == 'g':
                        #         array = array.T

                        #     if not isinstance(pointer, list):
                        #         pointer = [pointer]
                        #     for pointer_i in pointer:
                        #         target_size = int(math.ceil(array.shape[0]/8))*8
                        #         padded_vocab = name[0] == 'wte' and pointer_i.shape[0] == target_size
                        #         padded_vocab = padded_vocab and pointer_i.shape[1:] == array.shape[1:]
                        #         try:
                        #             assert pointer_i.shape == array.shape or padded_vocab
                        #         except AssertionError as e:

                        #             e.args += (pointer_i.shape, array.shape)
                        #             raise
                        #         if init_something:
                        #             print("Initialize PyTorch weight {}".format(name))
                        #             if padded_vocab:
                        #                 pointer_i.data[:array.shape[0]] = torch.from_numpy(array)
                        #             else:
                        #                 pointer_i.data = torch.from_numpy(array)
                        #         if model_opt.gpt2_params_std > 0:
                        #             if padded_vocab:
                        #                 raise NotImplementedError
                        #             else:
                        #                 pointer_i.orig = torch.from_numpy(array).clone()
            if 'enc_model' in checkpoint:
                load_dict = {
                    k[8:]: v
                    for k, v in checkpoint['enc_model'] if 'encoder' in k
                }
                encoder.load_state_dict(load_dict, strict=True)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if not model_opt.unconditional and hasattr(model.encoder, 'embeddings') \
                and model.encoder.embeddings is not None:
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec)

    # remove requires_grad from params that are not trained:
    if model_opt.notrain_emb or model_opt.notrain_embanddec:
        if model_opt.position_encoding_learned_enc and model_opt.share_position_embeddings:
            model.encoder.embeddings.make_embedding.pe.pe.weight.requires_grad = False
        if model_opt.share_embeddings:
            model.encoder.embeddings.make_embedding.emb_luts[
                0].weight.requires_grad = False
        model.decoder.embeddings.make_embedding.pe.pe.weight.requires_grad = False
        model.decoder.embeddings.make_embedding.emb_luts[
            0].weight.requires_grad = False
        generator[0].weight.requires_grad = False

    if model_opt.notrain_genbias:
        generator[0].bias.requires_grad = False

    if model_opt.notrain_embanddec:
        for name, p in load_decoder.layer_norm.named_parameters():
            p.requires_grad = False
        for name, p in load_decoder.transformer_layers.named_parameters():
            if 'context' not in name and 'ctx' not in name:  # Takes care of normal and psa versions
                p.requires_grad = False

    if model_opt.onlytrainln:
        for name, p in model.decoder.named_parameters():
            if 'layer_norm' not in name:
                p.requires_grad = False
        for p in generator.parameters():
            p.requires_grad = False

    if model_opt.onlytrainoutp:
        if model_opt.share_decoder_embeddings:
            raise ValueError

        for p in model.decoder.parameters():
            p.requires_grad = False

    if model_opt.simple_fusion:
        for p in lm_decoder.parameters():
            p.requires_grad = False
        for p in generator.lm_linear.parameters():
            p.requires_grad = False

    model.generator = generator
    model.to(device)
    if model_opt.model_dtype == 'fp16':
        model.half()

    for p in model.parameters():
        if hasattr(p, 'orig'):
            p.orig = p.orig.to(device)
            if model_opt.model_dtype == 'fp16':
                p.orig = p.orig.half()

    return model
Esempio n. 16
0
def build_base_model(model_opt,
                     fields,
                     gpu,
                     length_model,
                     length_penalty_a,
                     length_penalty_b,
                     checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # for backward compatibility
    if model_opt.rnn_size != -1:
        model_opt.enc_rnn_size = model_opt.rnn_size
        model_opt.dec_rnn_size = model_opt.rnn_size
        if model_opt.model_type == 'text' and \
           model_opt.enc_rnn_size != model_opt.dec_rnn_size:
            raise AssertionError("""We do not support different encoder and
                                     decoder rnn sizes for translation now.""")

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        if ("image_channel_size" not in model_opt.__dict__):
            image_channel_size = 3
        else:
            image_channel_size = model_opt.image_channel_size

        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dropout,
                               image_channel_size)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers,
                               model_opt.dec_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dec_rnn_size,
                               model_opt.audio_enc_pooling, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.NMTModel(encoder, decoder)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        # generator = nn.Sequential(
        #     nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
        #     gen_func
        # )

        # MMM
        class tune_out_prob(nn.Module):
            def __init__(self):
                super(tune_out_prob, self).__init__()
                self.t_lens = None
                self.eos_ind = None
                self.batch_max_len = None
                self.word_index = None
                self.tgt_vocab_size = None
                self.validation = False

            def length_model_loss(self, scale, value, a, b):
                #return -(value / scale) ** 2 - scale.log()
                #return -((value / scale) **2)/2 - (2.5066*scale).log()
                return -a * (value / scale)**2 + b  #*abs(scale)
                # return -((value / scale) ** 2)*scale + scale
                #return -(value / scale)*4 + scale

            def forward(self, x):
                y = x.clone()
                #mask = np.ones(x.size())
                # for i in range(self.t_lens.size(-1)):
                #     y[i*self.batch_size + self.t_lens[i], self.eos_ind] = \
                #         y[i * self.batch_size + self.t_lens[i], self.eos_ind].clone() + math.log(0.9)
                if self.training or self.validation:  # training phase
                    y = y.view(self.batch_max_len, -1, self.tgt_vocab_size)
                    # eos_list = [(i * self.batch_max_len + self.t_lens.data.cpu().numpy()[i]) for i in
                    #             range(self.t_lens.size(-1))]
                    # other_list = list(set(list(range(x.size(0)))) - set(eos_list))
                    # y[other_list, self.eos_ind] = -100
                    # y[eos_list, self.eos_ind] = 0
                    for wi in range(self.batch_max_len):
                        delta_p = (self.t_lens - wi - 1).float()
                        delta_p[delta_p < 0] = 0.05 * delta_p[delta_p < 0]
                        scale = (self.t_lens.float()).sqrt() / 2.0
                        penalties = self.length_model_loss(
                            scale, delta_p, length_penalty_a, length_penalty_b)
                        #penalties[penalties > 0] = 0
                        y[wi, :, self.eos_ind] += penalties
                    y = y.view(-1, self.tgt_vocab_size)
                    #mask[eos_list, self.eos_ind] = +2
                    #mask[other_list, self.eos_ind] = -2
                else:  # translation phase
                    if len(
                            x.size()
                    ) == 3:  # x of shape [ tgt_len, batch_size, vocab ] is a full sentence
                        # for i in range(len(self.t_lens)):
                        #     other_list = list(set(list(range(x.size(0)))) - set(list([self.t_lens.data.cpu().numpy()[i]])))
                        #     #mask[other_list, i, self.eos_ind] = -2
                        #     y[other_list, i, self.eos_ind] = -100
                        #     if self.t_lens[i] < x.size(0):
                        #         #mask[self.t_lens[i], i, self.eos_ind] = +2
                        #         y[self.t_lens[i], i, self.eos_ind] = 0
                        pass
                    else:  # x of shape [(batch_size x beam_size) , vocab ] is only for one step
                        beam_size = x.size(0) // self.t_lens.numel()
                        wi = self.word_index
                        delta_p = (self.t_lens - wi - 2).float()
                        delta_p[delta_p < 0] = 0.005 * delta_p[delta_p < 0]
                        delta_p = delta_p.unsqueeze(1).expand(
                            self.t_lens.numel(), beam_size).flatten()
                        scale = (self.t_lens.float()).sqrt() / 2.0
                        scale = scale.unsqueeze(1).expand(
                            self.t_lens.numel(), beam_size).flatten()
                        penalties = self.length_model_loss(
                            scale, delta_p, length_penalty_a, length_penalty_b)
                        #penalties[penalties > 0] = 0
                        y[:, self.eos_ind] += penalties
                        #y[eos_list ^ 1, self.eos_ind] = -100
                return y
                #mask = torch.tensor(mask, dtype=x.dtype).to(device)
                #x= x+mask
                #return x

                # y = x.clone()
                # # 1. since y is the output of log_softmax, apply exponential
                # # to convert it to probabilistic form
                # y = torch.exp(y)
                # # 2. tune probabilities
                # eos_list = [(i * self.batch_max_len + self.t_lens.data.cpu().numpy()[i]) for i in
                #             range(self.t_lens.size(-1))]
                # other_list = list(set(list(range(y.size(0)))) - set(eos_list))
                #
                # z = y.clone()
                # # 2.1. tune probabilities for eos positions
                # z[eos_list, self.eos_ind] = 1
                # z[eos_list, 0:self.eos_ind] = 0
                # z[eos_list, self.eos_ind+1:-1] = 0
                #
                # # 2.2. tune probabilities for non-eos positions
                # p_val = z[other_list, self.eos_ind] / (self.tgt_vocab_size - 1)
                # z[other_list, self.eos_ind] = 0
                # non_eos_inds = list(set(list(range(self.tgt_vocab_size))) - set([self.eos_ind]))
                # for i in range(len(other_list)):
                #     z[other_list[i], non_eos_inds] = y[other_list[i], non_eos_inds] + p_val[i]
                #
                # # 3. convert y back to log-probability form
                # z = torch.log(z)
                # return z

        # MMM
        if length_model == 'oracle' or length_model == 'fixed_ratio' or length_model == 'lstm':
            generator = nn.Sequential(
                nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
                gen_func, tune_out_prob())
        else:
            generator = nn.Sequential(
                nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
                gen_func)
        # generator = nn.Sequential(
        #     nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
        #     gen_func
        # )
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = \
            {fix_key(k): v for (k, v) in checkpoint['model'].items()}
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Esempio n. 17
0
def make_base_model(model_opt, mappings, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    # Make encoder.
    src_dict = mappings['src_vocab']
    src_embeddings = make_embeddings(model_opt, src_dict)
    encoder = make_encoder(model_opt, src_embeddings)

    # Make context embedder.
    if model_opt.num_context > 0:
        context_dict = mappings['utterance_vocab']
        context_embeddings = make_embeddings(model_opt, context_dict)
        context_embedder = make_context_embedder(model_opt, context_embeddings)

    # Make kb embedder.
    if "multibank" in model_opt.global_attention:
        if model_opt.model == 'lf2lf':
            kb_embedder = None
        else:
            kb_dict = mappings['kb_vocab']
            kb_embeddings = make_embeddings(model_opt, kb_dict)
            kb_embedder = make_context_embedder(model_opt, kb_embeddings, 'kb')

    # Make decoder.
    tgt_dict = mappings['tgt_vocab']
    tgt_embeddings = make_embeddings(model_opt, tgt_dict)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings, tgt_dict)

    if "multibank" in model_opt.global_attention:
        model = NegotiationModel(encoder,
                                 decoder,
                                 context_embedder,
                                 kb_embedder,
                                 stateful=model_opt.stateful)
    else:
        model = NMTModel(encoder, decoder, stateful=model_opt.stateful)

    model.model_type = 'text'

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(nn.Linear(model_opt.rnn_size, len(tgt_dict)),
                                  nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)

        wordvec = {'utterance': model_opt.pretrained_wordvec[0]}
        if len(model_opt.pretrained_wordvec) > 1:
            wordvec['kb'] = model_opt.pretrained_wordvec[1]

        def load_wordvec(embeddings, name):
            embeddings.load_pretrained_vectors(
                wordvec[name], model_opt.fix_pretrained_wordvec)

        # Don't need pretrained word vec for LFs
        if not model_opt.model in ('lf2lf', ):
            load_wordvec(model.encoder.embeddings, 'utterance')
            if hasattr(model, 'context_embedder'):
                load_wordvec(model.context_embedder.embeddings, 'utterance')
        if hasattr(model, 'kb_embedder') and model.kb_embedder is not None:
            load_wordvec(model.kb_embedder.embeddings, 'kb')

        if model_opt.model == 'seq2seq':
            load_wordvec(model.decoder.embeddings, 'utterance')

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 18
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)

        encoder = build_encoder(
            model_opt, src_embeddings
        )  # we added additional encoder: TransformerEncoderLM

    elif model_opt.model_type == "img":
        if ("image_channel_size" not in model_opt.__dict__):
            image_channel_size = 3
        else:
            image_channel_size = model_opt.image_channel_size

        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dropout,
                               image_channel_size)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers,
                               model_opt.dec_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dec_rnn_size,
                               model_opt.audio_enc_pooling, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    lm_aux = model_opt.encoder_type == "transformerAuxLTR"

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    # the model will return more stuff
    model = onmt.models.NMTModel(encoder, decoder, lm_aux=lm_aux)

    # Build Generator.
    # Hmmm...generator is just hidden states -> word in vocab
    # since we use shared embedding between encoder and decoder..plus shared embedding between
    # decoder src to tgt...
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
            gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab)

    # Build Source Generator
    # not considering copy attention right now
    if lm_aux:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        # source vocab does not have <s> </s>, but share_vocab might be different...
        src_generator = nn.Sequential(
            nn.Linear(model_opt.enc_rnn_size, len(fields["src"].vocab)),
            gen_func)
        # this would have made sure that both encoder and decoder share the same generator
        if model_opt.share_decoder_embeddings:
            src_generator[0].weight = src_embeddings.word_lut.weight

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
        # if lm_aux:
        #     src_generator.load_state_dict(checkpoint['src_generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            if lm_aux:
                for p in src_generator.parameters():
                    p.data.uniform_(-model_opt.param_init,
                                    model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            if lm_aux:
                for p in src_generator.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    if lm_aux:
        model.src_generator = src_generator
    model.to(device)

    return model
Esempio n. 19
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = ONMTDataset.collect_feature_dicts(fields)
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    else:
        encoder = ImageEncoder(model_opt.layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible.
    feature_dicts = []
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)
    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)
    logsoftmax = nn.LogSoftmax() 
    softmax = nn.Softmax()
    # add the generator to the module (does this register the parameter?)
    model.generator = generator
    model.logsoftmax = logsoftmax
    model.softmax = softmax
    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 20
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build encoder.
    src_dict = fields["src"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
    src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
    encoder = build_encoder(model_opt, src_embeddings)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= [session encoder] + encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")

    if model_opt.experiment == 'session':
        # Build Session Encoder.
        item_embeddings = build_embeddings(model_opt,
                                           fields["src_item_sku"].vocab, [],
                                           for_user=True)
        user_log_embeddings = build_embeddings(model_opt,
                                               fields["src_user_log"].vocab,
                                               [],
                                               for_user=True)
        user_op_embeddings = build_embeddings(model_opt,
                                              fields["src_operator"].vocab, [],
                                              for_user=True)
        user_site_cy_embeddings = build_embeddings(model_opt,
                                                   fields["src_site_cy"].vocab,
                                                   [],
                                                   for_user=True)
        user_site_pro_embeddings = build_embeddings(
            model_opt, fields["src_site_pro"].vocab, [], for_user=True)
        user_site_ct_embeddings = build_embeddings(model_opt,
                                                   fields["src_site_ct"].vocab,
                                                   [],
                                                   for_user=True)
        session_encoder = SessionEncoder(item_embeddings, user_log_embeddings,
                                         user_op_embeddings,
                                         user_site_cy_embeddings,
                                         user_site_pro_embeddings,
                                         user_site_ct_embeddings)
    else:
        session_encoder = None

    model = onmt.models.NMTModel(session_encoder, encoder, decoder)
    model.model_type = model_opt.model_type

    # Build Generator. Copy Generator.
    generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab,
                              model_opt.session_weight,
                              model_opt.explanation_weight)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Esempio n. 21
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        src_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         src_feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    tgt_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     tgt_feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make inference network.
    inference_network = make_inference_network(
        model_opt, src_embeddings, tgt_embeddings, src_dict, src_feature_dicts,
        tgt_dict, tgt_feature_dicts
    ) if model_opt.inference_network_type != "none" else None

    if model_opt.prior_normalization == "bnshare":
        decoder.attn.bn_mu = inference_network.bn_mu
        decoder.attn.bn_std = inference_network.bn_std

    # Make NMTModel(= encoder + decoder + inference network).
    model = (NMTModel(encoder, decoder, None, "none")
             if inference_network is None else ViNMTModel(
                 encoder,
                 decoder,
                 inference_network,
                 dist_type=model_opt.dist_type,
                 use_prior=model_opt.use_prior > 0))
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        #model.load_state_dict(checkpoint['model'])
        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt,
                    fields,
                    gpu,
                    checkpoint=None,
                    stage1=True,
                    basic_enc_dec=False):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    if stage1 and not basic_enc_dec:
        assert False
        src = "src1"
        tgt = "tgt1"
    else:
        src = "src2"
        tgt = "tgt2"
    src_hist = "src1_hist" if (basic_enc_dec or stage1) else None
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields[src].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, src)

        src_embeddings = make_embeddings(model_opt,
                                         src_dict,
                                         feature_dicts,
                                         hist_dict=fields[src_hist].vocab,
                                         use_hier_hist=True)

        encoder = make_encoder(model_opt, src_embeddings, stage1,
                               basic_enc_dec)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields[tgt].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, tgt)
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings, stage1
                           and not basic_enc_dec, basic_enc_dec)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    generator = CopyGenerator(model_opt.rnn_size, fields["tgt2"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
        # print("model load stats ...")
        # new_model_keys = set(model.state_dict().keys())
        # old_model_keys = set(checkpoint['model'].keys())
        # print("missing keys when load...")
        # print(new_model_keys - old_model_keys)
        # print("abundant keys when load...")
        # print(old_model_keys - new_model_keys)

        # print("gen load stats...")
        # new_gen_keys = set(generator.state_dict().keys())
        # old_gen_keys = set(checkpoint['generator'].keys())
        # print("missing keys when load...")
        # print(new_gen_keys - old_gen_keys)
        # print("abundant keys when load...")
        # print(old_gen_keys - new_gen_keys)
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 23
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Build Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    if model_opt.share_embeddings:
        assert model.encoder.embeddings.word_lut.weight \
               is model.decoder.embeddings.word_lut.weight
    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Esempio n. 24
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu: Boolean: whether to use gpu.
        checkpoint: the snapshot model.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = ONMTDataset.collect_feature_dicts(fields)
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        if model_opt.encoder_type == "double_encoder":
            inter_dict = fields['inter'].vocab
            inter_feature_dicts = ONMTDataset.collect_feature_dicts(fields)
            inter_embeddings = make_embeddings(model_opt,
                                               inter_dict,
                                               inter_feature_dicts,
                                               for_encoder=False,
                                               for_encoder_int=True)
            encoder = make_encoder(model_opt, src_embeddings, inter_embeddings)
        else:
            encoder = make_encoder(model_opt, src_embeddings)
    else:
        encoder = ImageEncoder(model_opt.layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible.
    feature_dicts = []
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)
    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    if model_opt.encoder_type == "double_encoder":
        model = DoubleEncNMTModel(encoder, decoder)
    else:
        model = NMTModel(encoder, decoder)

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint.
    if checkpoint is not None:
        print('Loading model')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])

    # add the generator to the module (does this register the parameter?)
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 25
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the MemModel.
    """
    # Build encoder.
    src_dict = fields["src"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
    src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)

    encoder = build_encoder(model_opt, src_embeddings)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')
        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")

    model = onmt.models.MemModel(encoder, decoder)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
            gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    elif model_opt.coref_vocab or model_opt.coref_attn:
        generator = CorefGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab,
                                   fields["coref_tgt"].vocab)
    else:
        generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Esempio n. 26
0
def make_base_model_mmt(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the Multimodal NMT model.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        # encoder = make_encoder(model_opt, src_embeddings)
        if model_opt.multimodal_model_type in ['imgd', 'imge', 'src+img']:
            encoder = make_encoder(model_opt, src_embeddings)
        elif model_opt.multimodal_model_type == 'imgw':
            # model ImgW uses a specific source-language encoder
            encoder = RNNEncoderImageAsWord(model_opt.rnn_type, model_opt.brnn,
                                            model_opt.enc_layers,
                                            model_opt.rnn_size,
                                            model_opt.dropout, src_embeddings)
        else:
            raise Exception("Multi-modal model type not implemented: %s" %
                            model_opt.multimodal_model_type)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    if model_opt.multimodal_model_type == 'src+img':
        # use the local image features "as is": encoder only reshapes them
        encoder_image = make_encoder_image_local_features(model_opt)
    else:
        # transform global image features before using them
        encoder_image = make_encoder_image_global_features(model_opt)

    # Make NMTModel(= encoder + decoder).
    # model = NMTModel(encoder, decoder)
    # model.model_type = model_opt.model_type
    if model_opt.multimodal_model_type == 'imgd':
        model = NMTImgDModel(encoder, decoder, encoder_image)
    elif model_opt.multimodal_model_type == 'imge':
        model = NMTImgEModel(encoder, decoder, encoder_image)
    elif model_opt.multimodal_model_type == 'imgw':
        model = NMTImgWModel(encoder, decoder, encoder_image)
    elif model_opt.multimodal_model_type == 'src+img':
        # using image encoder only to reshape local features
        model = NMTSrcImgModel(encoder, decoder, encoder_image)
    else:
        raise Exception("Multi-modal model type not yet implemented: %s" %
                        (model_opt.multimodal_model_type))

    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Initializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 27
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible.
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required
    if model_opt.share_embeddings:
        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 28
0
def make_base_model(model_opt, src_dict, tgt_dict, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    src_embeddings = make_embeddings(model_opt, src_dict, for_encoder=True)
    encoder = make_encoder(model_opt, src_embeddings)

    # Make decoder.
    tgt_embeddings = make_embeddings(model_opt, tgt_dict, for_encoder=False)
    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    # if model_opt.pre_word_vecs_enc is not None:
    #     print("Loading word vectors for encoder")
    #     pretrained = torch.load(opt.pre_word_vecs_enc)
    #     src_embeddings.word_lut.weight.data.copy_(pretrained)

    # if model_opt.pre_word_vecs_dec is not None:
    #     print("Loading word vectors for encoder")
    #     pretrained = torch.load(opt.pre_word_vecs_dec)
    #     tgt_embeddings.word_lut.weight.data.copy_(pretrained)

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(nn.Linear(model_opt.rnn_size, len(tgt_dict)),
                                  nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, tgt_dict)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Esempio n. 29
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # for backward compatibility
    if model_opt.rnn_size != -1:
        model_opt.enc_rnn_size = model_opt.rnn_size
        model_opt.dec_rnn_size = model_opt.rnn_size
        if model_opt.model_type == 'text' and \
           model_opt.enc_rnn_size != model_opt.dec_rnn_size:
            raise AssertionError("""We do not support different encoder and
                                     decoder rnn sizes for translation now.""")

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    if model_opt.refer:
        ref_dict = fields["ref"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'ref')
        ref_embeddings = build_embeddings(model_opt, ref_dict, feature_dicts)
        refer = build_encoder(model_opt, ref_embeddings)
    else:
        refer = None
    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.NMTModel(encoder, decoder, refer)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
            gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = \
            {fix_key(k): v for (k, v) in checkpoint['model'].items()}
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Esempio n. 30
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        "Unsupported model type %s" % model_opt.model_type

    # for backward compatibility
    if model_opt.rnn_size != -1:
        model_opt.enc_rnn_size = model_opt.rnn_size
        model_opt.dec_rnn_size = model_opt.rnn_size

    # Build encoder.
    if model_opt.model_type == "text":
        feat_fields = [fields[k]
                       for k in inputters.collect_features(fields, 'src')]
        src_emb = build_embeddings(model_opt, fields["src"], feat_fields)
        encoder = build_encoder(model_opt, src_emb)
    elif model_opt.model_type == "img":
        # why is build_encoder not used here?
        # why is the model_opt.__dict__ check necessary?
        if "image_channel_size" not in model_opt.__dict__:
            image_channel_size = 3
        else:
            image_channel_size = model_opt.image_channel_size

        encoder = ImageEncoder(
            model_opt.enc_layers,
            model_opt.brnn,
            model_opt.enc_rnn_size,
            model_opt.dropout,
            image_channel_size
        )
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(
            model_opt.rnn_type,
            model_opt.enc_layers,
            model_opt.dec_layers,
            model_opt.brnn,
            model_opt.enc_rnn_size,
            model_opt.dec_rnn_size,
            model_opt.audio_enc_pooling,
            model_opt.dropout,
            model_opt.sample_rate,
            model_opt.window_size
        )

    # Build decoder.
    feat_fields = [fields[k]
                   for k in inputters.collect_features(fields, 'tgt')]
    tgt_emb = build_embeddings(
        model_opt, fields["tgt"], feat_fields, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        assert fields['src'].vocab == fields['tgt'].vocab, \
            "preprocess with -share_vocab if you use share_embeddings"

        tgt_emb.word_lut.weight = src_emb.word_lut.weight

    decoder = build_decoder(model_opt, tgt_emb)
    decoder2 = build_decoder(model_opt, tgt_emb)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    # model = onmt.models.NMTModel(encoder, decoder)
    model = onmt.models.KTransformerModel(encoder, decoder, decoder2)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
            gen_func
        )
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        vocab_size = len(fields["tgt"].vocab)
        pad_idx = fields["tgt"].vocab.stoi[fields["tgt"].pad_token]
        generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = {fix_key(k): v
                               for k, v in checkpoint['model'].items()}
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    model.generator = generator
    model.to(device)

    return model
Esempio n. 31
0
def build_base_model(model_opt, fields, gpu, checkpoint=None, gpu_id=None):
    """Build a model from opts.

    Args:
        model_opt: the option loaded from checkpoint. It's important that
            the opts have been updated and validated. See
            :class:`onmt.utils.parse.ArgumentParser`.
        fields (dict[str, torchtext.data.Field]):
            `Field` objects for the model.
        gpu (bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
        gpu_id (int or NoneType): Which GPU to use.

    Returns:
        the NMTModel.
    """

    # for back compat when attention_dropout was not defined
    try:
        model_opt.attention_dropout
    except AttributeError:
        model_opt.attention_dropout = model_opt.dropout

    # Build embeddings.
    if model_opt.model_type == "text" or model_opt.model_type == "vec":
        src_field = fields["src"]
        src_emb = build_embeddings(model_opt, src_field)
    else:
        src_emb = None

    # Build encoder.
    encoder = build_encoder(model_opt, src_emb)

    # Build decoder.
    tgt_field = fields["tgt"]
    tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        assert src_field.base_field.vocab == tgt_field.base_field.vocab, \
            "preprocess with -share_vocab if you use share_embeddings"

        tgt_emb.word_lut.weight = src_emb.word_lut.weight

    decoder = build_decoder(model_opt, tgt_emb)

    # Build NMTModel(= encoder + decoder).
    if gpu and gpu_id is not None:
        device = torch.device("cuda", gpu_id)
    elif gpu and not gpu_id:
        device = torch.device("cuda")
    elif not gpu:
        device = torch.device("cpu")
    model = onmt.models.NMTModel(encoder, decoder)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size,
                      len(fields["tgt"].base_field.vocab)),
            Cast(torch.float32), gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        tgt_base_field = fields["tgt"].base_field
        vocab_size = len(tgt_base_field.vocab)
        pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token]
        generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx)
        if model_opt.share_decoder_embeddings:
            generator.linear.weight = decoder.embeddings.word_lut.weight

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = {
            fix_key(k): v
            for k, v in checkpoint['model'].items()
        }
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec)

    model.generator = generator

    #if model_opt.teacher_forcing != "teacher":
    decoder.set_vocab_size(len(fields["tgt"].base_field.vocab))
    decoder.set_generator(model.generator)

    model.to(device)
    if model_opt.model_dtype == 'fp16' and model_opt.optim == 'fusedadam':
        model.half()
    return model
Esempio n. 32
0
def build_base_model(model_opt, fields, gpu, checkpoint=None, gpu_id=None):
    """Build a model from opts.

    Args:
        model_opt: the option loaded from checkpoint. It's important that
            the opts have been updated and validated. See
            :class:`onmt.utils.parse.ArgumentParser`.
        fields (dict[str, torchtext.data.Field]):
            `Field` objects for the model.
        gpu (bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
        gpu_id (int or NoneType): Which GPU to use.

    Returns:
        the NMTModel.
    """

    # Build embeddings.
    if model_opt.model_type == "text":
        src_field = fields["src"]
        src_emb = build_embeddings(model_opt, src_field)
    else:
        src_emb = None

    # Build encoder.
    encoder = build_encoder(model_opt, src_emb)

    # Build decoder.
    tgt_field = fields["tgt"]
    tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        assert src_field.base_field.vocab == tgt_field.base_field.vocab, \
            "preprocess with -share_vocab if you use share_embeddings"

        tgt_emb.word_lut.weight = src_emb.word_lut.weight

    decoder = build_decoder(model_opt, tgt_emb)

    # Build NMTModel(= encoder + decoder).
    if gpu and gpu_id is not None:
        device = torch.device("cuda", gpu_id)
    elif gpu and not gpu_id:
        device = torch.device("cuda")
    elif not gpu:
        device = torch.device("cpu")
    model = onmt.models.NMTModel(encoder, decoder)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size,
                      len(fields["tgt"].base_field.vocab)),
            Cast(torch.float32),
            gen_func
        )
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        tgt_base_field = fields["tgt"].base_field
        vocab_size = len(tgt_base_field.vocab)
        pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token]
        generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = {fix_key(k): v
                               for k, v in checkpoint['model'].items()}
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec)

    model.generator = generator
    model.to(device)
    if model_opt.model_dtype == 'fp16':
        model.half()

    return model