Ejemplo n.º 1
0
def make_audio_text_model_from_text(model_opt,
                                    fields,
                                    text_fields,
                                    gpu,
                                    checkpoint=None):
    model = make_base_model(model_opt, fields, gpu, None)

    src_dict = text_fields["src"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(text_fields, 'src')
    src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
    text_encoder = make_encoder(model_opt, src_embeddings)

    generator = model.generator
    generator.load_state_dict(checkpoint['generator'])

    text_model = NMTModel(text_encoder, model.decoder)
    text_model.model_type = 'text'
    text_model.decoder.set_generator(None)

    try:
        if model_opt.conv_global_encoder:
            global_speech_encoder = ConvGlobalAudioEncoder(
                model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size,
                model_opt.dropout, model_opt.sample_rate,
                model_opt.window_size)
        else:
            global_speech_encoder = GlobalAudioEncoder(model_opt.enc_layers,
                                                       model_opt.brnn,
                                                       model_opt.rnn_size,
                                                       model_opt.dropout,
                                                       model_opt.sample_rate,
                                                       model_opt.window_size)
    except:
        global_speech_encoder = GlobalAudioEncoder(
            model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size,
            model_opt.dropout, model_opt.sample_rate, model_opt.window_size)

    print "ff:", model_opt.ff_speech_decoder

    if model_opt.ff_speech_decoder:
        speech_decoder = FFAudioDecoder(model_opt.rnn_size * 3,
                                        model_opt.rnn_size,
                                        model_opt.dec_layers)
    else:
        speech_decoder = AudioDecoder(model_opt.rnn_type, model_opt.brnn,
                                      model_opt.dec_layers, model_opt.rnn_size,
                                      model_opt.global_attention,
                                      model_opt.coverage_attn,
                                      model_opt.context_gate,
                                      model_opt.copy_attn, model_opt.dropout)

    speech_model = SpeechModel(model.encoder, global_speech_encoder,
                               speech_decoder)

    if model_opt.param_init != 0.0:
        print('Intializing model parameters.')
        for p in text_model.parameters():
            p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        for p in speech_model.parameters():
            p.data.uniform_(-model_opt.param_init, model_opt.param_init)
    if hasattr(text_model.encoder, 'embeddings'):
        text_model.encoder.embeddings.load_pretrained_vectors(
            model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        text_model.load_state_dict(checkpoint['model'])
        if checkpoint.has_key(
                'speech_model') and checkpoint['speech_model'] is not None:
            print('  Loading speech model parameters')
            speech_model.load_state_dict(checkpoint['speech_model'])

    # Add generator to model (this registers it as parameter of model).
    text_model.decoder.set_generator(generator)
    text_model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        text_model.cuda()
        speech_model.cuda()
    else:
        text_model.cpu()
        speech_model.cpu()

    return model, text_model, speech_model
Ejemplo n.º 2
0
def make_auto_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    src_fields = fields[0]
    tgt_fields = fields[1]

    # Make encoder.
    src_dict = src_fields["src"].vocab
    feature_dicts = onmt.IO.collect_feature_dicts(src_fields, 'src')
    src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
    src_encoder = make_encoder(model_opt, src_embeddings)
    src_embeddings = make_embeddings(model_opt,
                                     src_dict,
                                     feature_dicts,
                                     for_encoder=False)
    src_decoder = make_decoder(model_opt, src_embeddings)

    # Make decoder.
    tgt_dict = tgt_fields["src"].vocab
    feature_dicts = onmt.IO.collect_feature_dicts(tgt_fields, 'src')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts)
    tgt_encoder = make_encoder(model_opt, tgt_embeddings)
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)
    tgt_decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    src_auto_model = NMTModel(src_encoder, src_decoder)
    tgt_auto_model = NMTModel(tgt_encoder, tgt_decoder)

    # Make Generator.
    src_generator = nn.Sequential(
        nn.Linear(model_opt.rnn_size, len(src_fields["tgt"].vocab)),
        nn.LogSoftmax())
    tgt_generator = nn.Sequential(
        nn.Linear(model_opt.rnn_size, len(tgt_fields["tgt"].vocab)),
        nn.LogSoftmax())

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # JD TODO: FIX THIS!
        print('Loading model parameters.')
        src_auto_model.load_state_dict(checkpoint['src_auto_model'])
        tgt_auto_model.load_state_dict(checkpoint['tgt_auto_model'])
        src_generator.load_state_dict(checkpoint['src_generator'])
        tgt_generator.load_state_dict(checkpoint['tgt_generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in src_encoder.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in src_decoder.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in tgt_encoder.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in tgt_decoder.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)

            src_encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
            src_decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
            tgt_encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)
            tgt_decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

            for p in src_generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in tgt_generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)

    # Add generator to model (this registers it as parameter of model).
    src_auto_model.generator = src_generator
    tgt_auto_model.generator = tgt_generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        src_auto_model.cuda()
        tgt_auto_model.cpu()
    else:
        src_auto_model.cpu()
        tgt_auto_model.cpu()

    return src_auto_model, tgt_auto_model
def make_base_model(model_opt,
                    fields,
                    gpu,
                    checkpoint=None,
                    stage1=True,
                    basic_enc_dec=False):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    if stage1 and not basic_enc_dec:
        assert False
        src = "src1"
        tgt = "tgt1"
    else:
        src = "src2"
        tgt = "tgt2"
    src_hist = "src1_hist" if (basic_enc_dec or stage1) else None
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields[src].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, src)

        src_embeddings = make_embeddings(model_opt,
                                         src_dict,
                                         feature_dicts,
                                         hist_dict=fields[src_hist].vocab,
                                         use_hier_hist=True)

        encoder = make_encoder(model_opt, src_embeddings, stage1,
                               basic_enc_dec)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields[tgt].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, tgt)
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings, stage1
                           and not basic_enc_dec, basic_enc_dec)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    generator = CopyGenerator(model_opt.rnn_size, fields["tgt2"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
        # print("model load stats ...")
        # new_model_keys = set(model.state_dict().keys())
        # old_model_keys = set(checkpoint['model'].keys())
        # print("missing keys when load...")
        # print(new_model_keys - old_model_keys)
        # print("abundant keys when load...")
        # print(old_model_keys - new_model_keys)

        # print("gen load stats...")
        # new_gen_keys = set(generator.state_dict().keys())
        # old_gen_keys = set(checkpoint['generator'].keys())
        # print("missing keys when load...")
        # print(new_gen_keys - old_gen_keys)
        # print("abundant keys when load...")
        # print(old_gen_keys - new_gen_keys)
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 4
0
def make_base_model(model_opt, src_dict, tgt_dict, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    src_embeddings = make_embeddings(model_opt, src_dict, for_encoder=True)
    encoder = make_encoder(model_opt, src_embeddings)

    # Make decoder.
    tgt_embeddings = make_embeddings(model_opt, tgt_dict, for_encoder=False)
    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    # if model_opt.pre_word_vecs_enc is not None:
    #     print("Loading word vectors for encoder")
    #     pretrained = torch.load(opt.pre_word_vecs_enc)
    #     src_embeddings.word_lut.weight.data.copy_(pretrained)

    # if model_opt.pre_word_vecs_dec is not None:
    #     print("Loading word vectors for encoder")
    #     pretrained = torch.load(opt.pre_word_vecs_dec)
    #     tgt_embeddings.word_lut.weight.data.copy_(pretrained)

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(nn.Linear(model_opt.rnn_size, len(tgt_dict)),
                                  nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, tgt_dict)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 5
0
def make_base_model(model_opt,
                    fields,
                    gpu,
                    checkpoint=None,
                    init_encoder=False,
                    rev_checkpoint=None,
                    top_layer=100):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)
    if model_opt.share_rnn:
        if model_opt.input_feed == 1:
            raise AssertionError('Cannot share encoder and decoder weights'
                                 'when using input feed in decoder')
        if model_opt.src_word_vec_size != model_opt.src_word_vec_size:
            raise AssertionError('Cannot share encoder and decoder weights'
                                 'if embeddings are different sizes')
        encoder.rnn = decoder.rnn

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None and not init_encoder:
        print('Loading model parameters from checkpoint.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)
        if init_encoder:
            model_dict = checkpoint['model']
            encoder_dict = {}

            model_dict_keys = []
            for key in model_dict.keys():
                if key[:7] == 'encoder':
                    if key[-7:] == 'reverse':
                        if int(key[-9]) > top_layer:
                            continue
                    else:
                        if key[8:18] != 'embeddings' and int(
                                key[-1]) > top_layer:
                            continue
                    model_dict_keys.append(key)
            print(model_dict_keys)

            # Load encoder parameters
            new_model_dict = model.state_dict()
            for key, value in model_dict.items():
                if key in model_dict_keys:
                    new_model_dict[key] = value
            """
            if rev_checkpoint is not None:
                rev_model_dict = rev_checkpoint['model']
                if key[:7] == 'encoder' and key[8:18] != 'embeddings':
                    new_model_dict[key+'_reverse'] = value
            """
            model.load_state_dict(new_model_dict)

            # Freeze encoder parameters
            for name, param in model.named_parameters():
                if name in model_dict_keys:
                    param.requires_grad = False

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 6
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.ReWE:
            generator_ReWE = nn.Sequential(
                nn.Linear(model_opt.rnn_size, 200), nn.ReLU(),
                nn.Linear(200, model_opt.tgt_word_vec_size))  #,
            #nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
            if model_opt.emb_regularizer:
                generator_ReWE[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)
        if model_opt.ReWE:
            generator_ReWE = CopyGenerator(model_opt.rnn_size,
                                           fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
        if model_opt.ReWE:
            generator_ReWE.load_state_dict(checkpoint['generator_regularizer'])

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                None, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                None, model_opt.fix_word_vecs_dec)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            if model_opt.ReWE:
                for p in generator_ReWE.parameters():
                    p.data.uniform_(-model_opt.param_init,
                                    model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            if model_opt.ReWE:
                for p in generator_ReWE.parameters():
                    if p.dim() > 1:
                        xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    if model_opt.ReWE:
        model.generator_ReWE = generator_ReWE

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 7
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu: Boolean: whether to use gpu.
        checkpoint: the snapshot model.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = ONMTDataset.collect_feature_dicts(fields)
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    else:
        encoder = ImageEncoder(model_opt.layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible.
    feature_dicts = []
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)
    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint.
    if checkpoint is not None:
        print('Loading model')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])

    # add the generator to the module (does this register the parameter?)
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 8
0
def make_base_model(model_opt, fields, gpu, checkpoint=None, train_part="all"):
    """
	Args:
		model_opt: the option loaded from checkpoint.
		fields: `Field` objects for the model.
		gpu(bool): whether to use gpu.
		checkpoint: the model gnerated by train phase, or a resumed snapshot
					model from a stopped training.
	Returns:
		the NMTModel.
	"""
    assert model_opt.model_type in ["text", "img", "audio"], \
     ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)
    context = make_context(model_opt, tgt_dict)

    # Make NMTModel(= encoder + decoder).
    if model_opt.RISK_ratio > 0.0:
        scorer = onmt.translate.GNMTGlobalScorer(model_opt.alpha,
                                                 model_opt.beta,
                                                 model_opt.coverage_penalty,
                                                 model_opt.length_penalty)
        model = NMTModel(encoder,
                         decoder,
                         context,
                         context_type=model_opt.context_type,
                         tgt_vocab=fields['tgt'].vocab,
                         beam_size=model_opt.beam_size,
                         n_best=model_opt.n_best,
                         gpu=gpu,
                         scorer=scorer,
                         min_length=model_opt.min_length,
                         max_length=model_opt.max_length,
                         stepwise_penalty=model_opt.stepwise_penalty,
                         block_ngram_repeat=model_opt.block_ngram_repeat,
                         ignore_when_blocking=model_opt.ignore_when_blocking,
                         copy_attn=model_opt.copy_attn,
                         context_size=model_opt.context_size)
    else:
        model = NMTModel(encoder,
                         decoder,
                         context,
                         context_type=model_opt.context_type)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model_dict = checkpoint['model']
        if train_part == "context":
            model_dict = model.state_dict()
            if 'join' in model_opt.context_type:
                pretrained_dict = {}
                for k, v in checkpoint['model'].items():
                    if k in model_dict:
                        if 'doc_context' in k:
                            k = k.replace('doc_context', 'doc_context.0')
                        pretrained_dict[k] = v
            else:
                pretrained_dict = {
                    k: v
                    for k, v in checkpoint['model'].items()
                    if k in model_dict and 'doc_context' not in k
                }
            model_dict.update(pretrained_dict)

        model.load_state_dict(model_dict, strict=False)
        generator.load_state_dict(checkpoint['generator'])
        if train_part == "context":
            print("Freezing parameters of main model")
            for param in model.parameters():
                param.require_grad = False
            for param in generator.parameters():
                param.require_grad = False
            print("Unfreezing parameters of context")
            for param in model.doc_context.parameters():
                param.require_grad = True
                if model_opt.param_init != 0.0:
                    param.data.uniform_(-model_opt.param_init,
                                        model_opt.param_init)
                if model_opt.param_init_glorot:
                    if param.dim() > 1:
                        xavier_uniform(param)
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 9
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        print('Collecting feature vocabularies...')
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        print('Building source embeddings...')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        print('Building an encoder...')
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')

    if model_opt.char_compos_type == 'none':

        tgt_embeddings = make_embeddings(model_opt,
                                         tgt_dict,
                                         feature_dicts,
                                         for_encoder=False)
        print('Using standard embeddings')

    else:

        spells = getVocabSpell(tgt_dict, gpu)
        embedding_dim = model_opt.tgt_word_vec_size

        chanQty = model_opt.char_comp_cnn_chan_qty
        dropout = model_opt.dropout
        numLayers = model_opt.char_comp_rnn_layer

        configs = {}

        for compType in model_opt.char_compos_type.split('-'):

            if compType == 'brnn':
                configs['brnn'] = {
                    'numLayers': numLayers,
                    'embedSize': embedding_dim
                }
            elif compType == 'rnn':
                configs['rnn'] = {
                    'numLayers': numLayers,
                    'embedSize': embedding_dim
                }
            elif compType == 'cnn':
                configs['cnn'] = {
                    'chanQty': chanQty,
                    'embedSize': embedding_dim
                }
            elif compType == 'wembed':
                configs['wembed'] = {'embedSize': embedding_dim}
            else:
                raise Exception('Invalid composition type: ' + compType)

        tgt_embeddings = Char2VecComposite(
            spells,
            configs,
            charEmbedSize=model_opt.char_embed_size,
            dropout=dropout)

        print('Using char-level composition embeddings of type %s' %
              model_opt.char_compos_type)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            print('Loading pre-trained source vectors: %s/%s' %
                  (model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc))
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder,
                   'embeddings') and model_opt.char_compos_type == 'none':
            print('Loading pre-trained source vectors: %s/%s' %
                  (model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec))
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        print('Moving model to CUDA')
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 10
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type
    # import pdb; pdb.set_trace()

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')

        # print("checkpoint")
        # for name, param in sorted(checkpoint["model"].items()):
        #     print(f"{name}", param.size())
        # print()
        # print("model itself")
        # for name, param in sorted(model.state_dict().items()):
        #     print(f"{name}", param.size())
        # print()
        state = model.state_dict()
        if state.keys() == checkpoint["model"].keys():
            model.load_state_dict(checkpoint["model"])
        else:
            state = model.state_dict()
            state.update(checkpoint["model"])
            model.load_state_dict(state)
            for name, param in model.named_parameters():
                if "embedding" in name:
                    if model_opt.param_init != 0.0:
                        param.data.uniform_(-model_opt.param_init,
                                            model_opt.param_init)
                    if model_opt.param_init_glorot:
                        if param.dim() > 1:
                            xavier_uniform(p)

        # print("checkpoint")
        # for name, param in sorted(checkpoint["generator"].items()):
        #     print(f"{name}", param.size())
        # print()
        # print("model itself generator")
        # for name, param in sorted(generator.state_dict().items()):
        #     print(f"{name}", param.size())

        state = generator.state_dict()
        if state.keys() == checkpoint["generator"].keys():
            generator.load_state_dict(checkpoint["generator"])
        else:
            state = generator.state_dict()
            state.update(checkpoint["generator"])
            generator.load_state_dict(state)
            for name, param in generator.named_parameters():
                if "linear.bias" in name or "linear.weight" in name:
                    if model_opt.param_init != 0.0:
                        param.data.uniform_(-model_opt.param_init,
                                            model_opt.param_init)
                    if model_opt.param_init_glorot:
                        if param.dim() > 1:
                            xavier_uniform(p)
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 11
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))
    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)


    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)
    if model_opt.image_feat_type =='local':
        # print('@@@ModelConstructor enter')
        # use the local image features "as is": encoder only reshapes them  
        #将feat_size通过一层网络映射到rnn_size
        encoder_img = make_encoder_image_local_features(model_opt)
    elif model_opt.image_feat_type=='global':
        # transform global image features before using them   
        encoder_img = make_encoder_image_global_features(model_opt)
    else:
        encoder_img=None

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder,encoder_img)

    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
   

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 12
0
def make_base_model(model_opt, fields, gpu, checkpoint=None, back_model=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)
    bk_tgt_embeddings = make_embeddings(model_opt,
                                        tgt_dict,
                                        feature_dicts,
                                        for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight
        bk_tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings, bk_tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        bk_generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
            bk_generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
        # bk_generator.load_state_dict(checkpoint['bk_generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # if back_model is not None:
    #     print('Loading back model parameters...')
    #     model_dict = model.state_dict()
    #     for key in model_dict.keys():
    #         if key.startswith('decoder.bk_rnn'):
    #             for load_key in back_model['model'].keys():
    #                 if key.split('.')[-1] == load_key.split('.')[-1] and load_key.startswith("decoder.rnn"):
    #                     print('From pretrained %s load %s' % (load_key, key))
    #                     model_dict.update({key: back_model['model'][load_key]})
    #     model.load_state_dict(model_dict)
    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    if model_opt.share_gen:
        model.bk_generator = generator
    else:
        model.bk_generator = bk_generator
    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 13
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    try:
        mmod_generator = 'generator' in model_opt.multimodal_model_type
        mmod_bank = 'bank' in model_opt.multimodal_model_type
        mmod_imgw = 'imgw' in model_opt.multimodal_model_type
        mmod_dcap = 'dcap' in model_opt.multimodal_model_type
        mmod_model = mmod_bank or mmod_imgw or mmod_dcap
    except AttributeError:
        mmod_generator = False
        mmod_bank = False
        mmod_imgw = False
        mmod_model = False
        mmod_dcap = False
    try:
        mmod_generator_add = model_opt.mmod_generator_add
    except AttributeError:
        mmod_generator_add = 0.0
    try:
        mmod_use_hidden = model_opt.mmod_use_hidden
    except AttributeError:
        mmod_use_hidden = False

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings, mmod_imgw=mmod_imgw)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(
        model_opt, tgt_dict, feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings, mmod_dcap)

    # Make Model
    if mmod_model:
        if mmod_bank:
            bridge = multimodal.MultiModalMemoryBankGate(
                model_opt.rnn_size, model_opt.img_feat_dim, add=mmod_generator_add)
        else:
            bridge = None
        model = multimodal.MultiModalNMTModel(encoder, bridge, decoder, imgw=mmod_imgw,
                                              num_capsules=model_opt.num_capsules, num_regions=model_opt.num_regions,
                                              dcap=mmod_dcap)
    else:
        # Make NMTModel(= encoder + decoder).
        model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if model_opt.copy_attn:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)
    else:
        print('creating a Sequential generator')
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut_weight
    if mmod_generator:
        print('wrapping in a MultiModalGenerator')
        generator = onmt.modules.multimodal.MultiModalGenerator(
            generator, model_opt.img_feat_dim,
            add=mmod_generator_add, use_hidden=mmod_use_hidden)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 14
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = ONMTDataset.collect_feature_dicts(fields)
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    else:
        encoder = ImageEncoder(model_opt.layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible.
    feature_dicts = []
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)
    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)
    logsoftmax = nn.LogSoftmax() 
    softmax = nn.Softmax()
    # add the generator to the module (does this register the parameter?)
    model.generator = generator
    model.logsoftmax = logsoftmax
    model.softmax = softmax
    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 15
0
def make_base_model(opt, model_opt, fields, checkpoint=None):
    """
    Args:
        opt: the option in current environment.
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        checkpoint: the snapshot model.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make Encoder.
    if model_opt.model_type == "text":
        src_vocab = fields["src"].vocab
        feature_dicts = ONMTDataset.collect_feature_dicts(fields)
        feats_padding_idx = [
            feat_dict.stoi[onmt.IO.PAD_WORD] for feat_dict in feature_dicts
        ]
        num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts]
        src_embeddings = make_embeddings(
            model_opt,
            src_vocab.stoi[onmt.IO.PAD_WORD],
            feats_padding_idx,
            len(src_vocab),
            for_encoder=True,
            num_feat_embeddings=num_feat_embeddings)

        encoder = make_encoder(model_opt, src_embeddings)
    else:
        encoder = ImageEncoder(model_opt.layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)

    # Make Decoder.
    tgt_vocab = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible
    feats_padding_idx = []
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_vocab.stoi[onmt.IO.PAD_WORD],
                                     feats_padding_idx,
                                     len(tgt_vocab),
                                     for_encoder=False)
    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= Encoder + Decoder).
    model = NMTModel(encoder, decoder)

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the modle states from checkpoint.
    if checkpoint is not None:
        print('Loading model')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])

    # Make the whole model leverage GPU if indicated to do so.
    if hasattr(opt, 'gpuid'):
        cuda = len(opt.gpuid) >= 1
    elif hasattr(opt, 'gpu'):
        cuda = opt.gpu > -1
    else:
        cuda = False

    if cuda:
        model.cuda()
        generator.cuda()
    else:
        model.cpu()
        generator.cpu()
    model.generator = generator

    return model
Ejemplo n.º 16
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 17
0
def make_base_model(model_opt, fields, gpu, checkpoint=None, stage1=True):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    if stage1:
        src = "src1"
        tgt = "tgt1"
    else:
        src = "src2"
        tgt = "tgt2"
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields[src].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, src)
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)

        table_embeddings = make_embeddings(model_opt,
                                           src_dict,
                                           feature_dicts,
                                           discard_word=True)
        # reusing the same embedding weights
        print(table_embeddings.make_embedding[0])
        table_embeddings.word_lut.weight = src_embeddings.word_lut.weight
        table_embeddings.field_lut.weight = src_embeddings.field_lut.weight
        table_embeddings.type_lut.weight = src_embeddings.type_lut.weight
        table_embeddings.ha_lut.weight = src_embeddings.ha_lut.weight

        encoder = make_encoder(model_opt, (src_embeddings, table_embeddings),
                               stage1)

    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields[tgt].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, tgt)
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    # NOTE: make decoder
    decoder = make_decoder(model_opt, tgt_embeddings, stage1)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if stage1:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt1"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        # NOTE: CopyGenerator
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt2"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 18
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible.
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required
    if model_opt.share_embeddings:
        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Ejemplo n.º 19
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):

    src_dict = fields["src"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
    src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
    encoder = make_encoder(model_opt, src_embeddings)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model