Beispiel #1
0
def make_G_model(model_opt, use_cuda=True, checkpoint=None):

    src_embeddings = continuousEmbedding(model_opt.src_vocab_size,
                                         model_opt.embedding_dim, data.PAD_idx)
    tgt_embeddings = continuousEmbedding(model_opt.tgt_vocab_size,
                                         model_opt.embedding_dim, data.PAD_idx)
    encoder = onmt.ModelConstructor.make_encoder(model_opt, src_embeddings)
    decoder = onmt.ModelConstructor.make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = "text"

    # Make Generator.
    generator = nn.Sequential(
        nn.Linear(model_opt.rnn_size, model_opt.tgt_vocab_size),
        nn.LogSoftmax(dim=-1))

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Initializing G model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

    model.generator = generator
    # Make the whole model leverage GPU if indicated to do so.
    if use_cuda:
        model.cuda()

    return model
Beispiel #2
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):

    src_dict = fields["src"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
    src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
    encoder = make_encoder(model_opt, src_embeddings)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #3
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible.
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required
    if model_opt.share_embeddings:
        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #4
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        src_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         src_feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    tgt_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     tgt_feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make inference network.
    inference_network = make_inference_network(
        model_opt, src_embeddings, tgt_embeddings, src_dict, src_feature_dicts,
        tgt_dict, tgt_feature_dicts
    ) if model_opt.inference_network_type != "none" else None

    if model_opt.prior_normalization == "bnshare":
        decoder.attn.bn_mu = inference_network.bn_mu
        decoder.attn.bn_std = inference_network.bn_std

    # Make NMTModel(= encoder + decoder + inference network).
    model = (NMTModel(encoder, decoder, None, "none")
             if inference_network is None else ViNMTModel(
                 encoder,
                 decoder,
                 inference_network,
                 dist_type=model_opt.dist_type,
                 use_prior=model_opt.use_prior > 0))
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        #model.load_state_dict(checkpoint['model'])
        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = ONMTDataset.collect_feature_dicts(fields)
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    else:
        encoder = ImageEncoder(model_opt.layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible.
    feature_dicts = []
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)
    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)
    logsoftmax = nn.LogSoftmax() 
    softmax = nn.Softmax()
    # add the generator to the module (does this register the parameter?)
    model.generator = generator
    model.logsoftmax = logsoftmax
    model.softmax = softmax
    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #6
0
def make_audio_text_model_from_text(model_opt,
                                    fields,
                                    text_fields,
                                    gpu,
                                    checkpoint=None):
    model = make_base_model(model_opt, fields, gpu, None)

    src_dict = text_fields["src"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(text_fields, 'src')
    src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
    text_encoder = make_encoder(model_opt, src_embeddings)

    generator = model.generator
    generator.load_state_dict(checkpoint['generator'])

    text_model = NMTModel(text_encoder, model.decoder)
    text_model.model_type = 'text'
    text_model.decoder.set_generator(None)

    try:
        if model_opt.conv_global_encoder:
            global_speech_encoder = ConvGlobalAudioEncoder(
                model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size,
                model_opt.dropout, model_opt.sample_rate,
                model_opt.window_size)
        else:
            global_speech_encoder = GlobalAudioEncoder(model_opt.enc_layers,
                                                       model_opt.brnn,
                                                       model_opt.rnn_size,
                                                       model_opt.dropout,
                                                       model_opt.sample_rate,
                                                       model_opt.window_size)
    except:
        global_speech_encoder = GlobalAudioEncoder(
            model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size,
            model_opt.dropout, model_opt.sample_rate, model_opt.window_size)

    print "ff:", model_opt.ff_speech_decoder

    if model_opt.ff_speech_decoder:
        speech_decoder = FFAudioDecoder(model_opt.rnn_size * 3,
                                        model_opt.rnn_size,
                                        model_opt.dec_layers)
    else:
        speech_decoder = AudioDecoder(model_opt.rnn_type, model_opt.brnn,
                                      model_opt.dec_layers, model_opt.rnn_size,
                                      model_opt.global_attention,
                                      model_opt.coverage_attn,
                                      model_opt.context_gate,
                                      model_opt.copy_attn, model_opt.dropout)

    speech_model = SpeechModel(model.encoder, global_speech_encoder,
                               speech_decoder)

    if model_opt.param_init != 0.0:
        print('Intializing model parameters.')
        for p in text_model.parameters():
            p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        for p in speech_model.parameters():
            p.data.uniform_(-model_opt.param_init, model_opt.param_init)
    if hasattr(text_model.encoder, 'embeddings'):
        text_model.encoder.embeddings.load_pretrained_vectors(
            model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        text_model.load_state_dict(checkpoint['model'])
        if checkpoint.has_key(
                'speech_model') and checkpoint['speech_model'] is not None:
            print('  Loading speech model parameters')
            speech_model.load_state_dict(checkpoint['speech_model'])

    # Add generator to model (this registers it as parameter of model).
    text_model.decoder.set_generator(generator)
    text_model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        text_model.cuda()
        speech_model.cuda()
    else:
        text_model.cpu()
        speech_model.cpu()

    return model, text_model, speech_model
Beispiel #7
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu: Boolean: whether to use gpu.
        checkpoint: the snapshot model.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = ONMTDataset.collect_feature_dicts(fields)
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        if model_opt.encoder_type == "double_encoder":
            inter_dict = fields['inter'].vocab
            inter_feature_dicts = ONMTDataset.collect_feature_dicts(fields)
            inter_embeddings = make_embeddings(model_opt,
                                               inter_dict,
                                               inter_feature_dicts,
                                               for_encoder=False,
                                               for_encoder_int=True)
            encoder = make_encoder(model_opt, src_embeddings, inter_embeddings)
        else:
            encoder = make_encoder(model_opt, src_embeddings)
    else:
        encoder = ImageEncoder(model_opt.layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible.
    feature_dicts = []
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)
    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    if model_opt.encoder_type == "double_encoder":
        model = DoubleEncNMTModel(encoder, decoder)
    else:
        model = NMTModel(encoder, decoder)

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint.
    if checkpoint is not None:
        print('Loading model')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])

    # add the generator to the module (does this register the parameter?)
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #8
0
def make_base_model(model_opt, fields, gpu, checkpoint=None, stage1=True):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    if stage1:
        src = "src1"
        tgt = "tgt1"
    else:
        src = "src2"
        tgt = "tgt2"
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields[src].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, src)
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)

        table_embeddings = make_embeddings(model_opt,
                                           src_dict,
                                           feature_dicts,
                                           discard_word=True)
        # reusing the same embedding weights
        print(table_embeddings.make_embedding[0])
        table_embeddings.word_lut.weight = src_embeddings.word_lut.weight
        table_embeddings.field_lut.weight = src_embeddings.field_lut.weight
        table_embeddings.type_lut.weight = src_embeddings.type_lut.weight
        table_embeddings.ha_lut.weight = src_embeddings.ha_lut.weight

        encoder = make_encoder(model_opt, (src_embeddings, table_embeddings),
                               stage1)

    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields[tgt].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, tgt)
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    # NOTE: make decoder
    decoder = make_decoder(model_opt, tgt_embeddings, stage1)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if stage1:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt1"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        # NOTE: CopyGenerator
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt2"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #9
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio", "hierarchical_text"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "hierarchical_text":
        print("Modelcounstructor line:157 make hierarchical model")
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        sent_encoder = make_encoder(model_opt, src_embeddings)  
        if model_opt.hier_add_word_enc_input:
            encoder = make_encoder(model_opt, src_embeddings)
        else:
            encoder = None
        
        # because sub context length is not sorted
        # 18.08.03 to modify method
#         sent_encoder.no_pack_padded_seq = True
        context_encoder = ContextEncoder(model_opt.rnn_type, model_opt.brnn, model_opt.enc_layers,
                          model_opt.rnn_size, model_opt.dropout, model_opt.rnn_size,
                          model_opt.bridge)
        
        # because sub context length is not sorted
#         context_encoder.no_pack_padded_seq = True

    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    if model_opt.model_type == "hierarchical_text":
        model = HierarchicalModel(context_encoder, sent_encoder, decoder, normal_encoder=encoder)
    else:
        model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type
    
    # for deep summarization
    model.obj_f = model_opt.obj_f
    print("ModelConstructor line:191, model.obj_f")
    print(model.obj_f)
    
    print("ModelConstructor line:195, tgt vocab len", len(fields["tgt"].vocab))
    print("ModelConstructor line:196, tgt vocab freq len", len(fields["tgt"].vocab.freqs))
#     input("MC line 222")
    # get idf value
#     words = [ fields["tgt"].vocab.itos[i] for i in range(len(fields["tgt"].vocab)) ]
#     def get_df(src_file_path, words):
#         words_df = [0] * len(words)
#         with open(src_file_path, 'r', encoding="utf-8") as src_file:
#             import collections
            
#             cnt = 0
#             for line in src_file:
#                 cnt += 1
#                 src_words = line.split()
#                 src_words_dict = { word:1 for word in src_words }
#                 for i in range(len(words)):
#                     if words[i] in src_words_dict:
#                         words_df[i] += 1
#         return words_df, cnt
    
#     words_df, cnt = get_df("article_data/article_src_train_src_500_tar_50.txt", words)
#     print("Modelconstructor line:216 complete get df information")
    
#     for i in range(len(fields["tgt"].vocab)):
#         from math import log
#         word = words[i]
#         if words_df[i] == 0:
#             words_df[i] = 1
# #         word_freq = fields["src"].vocab.freqs[word] if fields["src"].vocab.freqs[word] > 0 else 1
#         print("{}.{} : {}/{}  ".format(i, word, words_df[i], round(log(cnt/words_df[i]),2)), end='|| ')
#         if i % 10 == 0:
#             print()
    
#     input("ModelConstructor line:191 stop")

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
        if model_opt.model_type == "hierarhical_text" and hasattr(model.sent_encoder, 'embeddings'):
            model.sent_encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)                    
        elif model_opt.model_type == "text" and hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        if not model_opt.encoder2_type == 'none':
            src_dict2 = fields["src2"].vocab
            feature_dicts2 = onmt.io.collect_feature_vocabs(fields, 'src2')
            src_embeddings2 = make_embeddings(model_opt, src_dict2,
                                              feature_dicts2)

        if 'morph' in fields and hasattr(fields["morph"], 'vocab'):
            morph_dict = fields["morph"].vocab
            morph_embeddings = make_morph_embeddings(model_opt, morph_dict, [])
            encoder = make_encoder(model_opt, src_embeddings, morph_embeddings)
            encoder2 = make_encoder(
                model_opt,
                src_embeddings2,
                morph_embeddings,
                encoder_type='rnn'
            ) if not model_opt.encoder2_type == 'none' else None
        # else:
        #     encoder = make_encoder(model_opt, src_embeddings)  # gcn features must go here
        #     encoder2 = make_encoder(model_opt, src_embeddings2, encoder_type='rnn') if not model_opt.encoder2_type == 'none' else None # gcn features must go here
        else:
            encoder = make_encoder(model_opt,
                                   src_embeddings,
                                   encoder_type=model_opt.encoder_type
                                   )  # gcn features must go here
            if model_opt.encoder2_type == 'none':
                encoder2 = None
            else:
                if model_opt.encoder2_type == 'gcn':
                    encoder2 = make_encoder(
                        model_opt, src_embeddings,
                        encoder_type='gcn')  # gcn features must go here
                elif model_opt.encoder2_type == 'rnn':
                    encoder2 = make_encoder(model_opt,
                                            src_embeddings2,
                                            encoder_type='rnn')
                else:
                    raise ValueError("Not implemented yet.")
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    if model_opt.encoder2_type == 'none':
        encoder2 = None
    if model_opt.encoder_type == 'gcn':
        if model_opt.use_dgl:
            model = NMTModelGCN_DGL(encoder, decoder, encoder2=encoder2)
        else:
            model = NMTModelGCN(encoder, decoder, encoder2=encoder2)
    else:
        model = NMTModel(encoder, decoder, encoder2=encoder2)
    model.model_type = model_opt.model_type  # text

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.encoder2, 'embeddings'):
            model.encoder2.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc2, model_opt.fix_word_vecs_enc2)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.

    src_dict = fields["src"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
    src_embeddings = make_embeddings(model_opt,
                                     src_dict,
                                     feature_dicts,
                                     for_encoder=True)
    encoder = make_encoder(model_opt, src_embeddings)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    device = torch.device("cuda")
    all_docs = load_all_docs(model_opt, fields, device)

    # Make NMTModel(= encoder + decoder).
    if model_opt.encoder_type == 'BiAttEncoder' or model_opt.encoder_type == 'transformer':
        model = TwoEncoderModel(encoder, decoder, all_docs, src_embeddings)
    elif model_opt.encoder_type == "PostEncoder":
        model = NMTModel(encoder, decoder)

    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #12
0
def make_base_model(model_opt, fields, gpu, checkpoint=None, back_model=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)
    bk_tgt_embeddings = make_embeddings(model_opt,
                                        tgt_dict,
                                        feature_dicts,
                                        for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight
        bk_tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings, bk_tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        bk_generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
            bk_generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
        # bk_generator.load_state_dict(checkpoint['bk_generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # if back_model is not None:
    #     print('Loading back model parameters...')
    #     model_dict = model.state_dict()
    #     for key in model_dict.keys():
    #         if key.startswith('decoder.bk_rnn'):
    #             for load_key in back_model['model'].keys():
    #                 if key.split('.')[-1] == load_key.split('.')[-1] and load_key.startswith("decoder.rnn"):
    #                     print('From pretrained %s load %s' % (load_key, key))
    #                     model_dict.update({key: back_model['model'][load_key]})
    #     model.load_state_dict(model_dict)
    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    if model_opt.share_gen:
        model.bk_generator = generator
    else:
        model.bk_generator = bk_generator
    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    # make variational Inference
    #variationalInference = onmt.Models.VariationalInference(
    #    model_opt.rnn_size, model_opt.rnn_size, model_opt.latent_dim,
    #    model_opt.cluster_num, model_opt.batch_size)
    model_opt.variable_src_dict = fields["src"].vocab
    if model_opt.use_gmm > 0:
        variationalInference = onmt.Models.VariationalInference(model_opt)
    else:
        variationalInference = None

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder, variationalInference, model_opt)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())  # work for pytorch <= 0.1 version
        #nn.LogSoftmax(dim=-1)) # work for pytorch >=0.3/4 version
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                print("param_init p:", p)
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
                print("after param_init p:", p)
            for p in generator.parameters():
                print("generator param_init p:", p)
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                print("param_init_glorot p:", p)
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                print("generator param_init_glorot p:", p)
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #14
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    try:
        mmod_generator = 'generator' in model_opt.multimodal_model_type
        mmod_bank = 'bank' in model_opt.multimodal_model_type
        mmod_imgw = 'imgw' in model_opt.multimodal_model_type
        mmod_dcap = 'dcap' in model_opt.multimodal_model_type
        mmod_model = mmod_bank or mmod_imgw or mmod_dcap
    except AttributeError:
        mmod_generator = False
        mmod_bank = False
        mmod_imgw = False
        mmod_model = False
        mmod_dcap = False
    try:
        mmod_generator_add = model_opt.mmod_generator_add
    except AttributeError:
        mmod_generator_add = 0.0
    try:
        mmod_use_hidden = model_opt.mmod_use_hidden
    except AttributeError:
        mmod_use_hidden = False

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings, mmod_imgw=mmod_imgw)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(
        model_opt, tgt_dict, feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings, mmod_dcap)

    # Make Model
    if mmod_model:
        if mmod_bank:
            bridge = multimodal.MultiModalMemoryBankGate(
                model_opt.rnn_size, model_opt.img_feat_dim, add=mmod_generator_add)
        else:
            bridge = None
        model = multimodal.MultiModalNMTModel(encoder, bridge, decoder, imgw=mmod_imgw,
                                              num_capsules=model_opt.num_capsules, num_regions=model_opt.num_regions,
                                              dcap=mmod_dcap)
    else:
        # Make NMTModel(= encoder + decoder).
        model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if model_opt.copy_attn:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)
    else:
        print('creating a Sequential generator')
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut_weight
    if mmod_generator:
        print('wrapping in a MultiModalGenerator')
        generator = onmt.modules.multimodal.MultiModalGenerator(
            generator, model_opt.img_feat_dim,
            add=mmod_generator_add, use_hidden=mmod_use_hidden)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt,
                    fields,
                    gpu,
                    checkpoint=None,
                    init_encoder=False,
                    rev_checkpoint=None,
                    top_layer=100):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)
    if model_opt.share_rnn:
        if model_opt.input_feed == 1:
            raise AssertionError('Cannot share encoder and decoder weights'
                                 'when using input feed in decoder')
        if model_opt.src_word_vec_size != model_opt.src_word_vec_size:
            raise AssertionError('Cannot share encoder and decoder weights'
                                 'if embeddings are different sizes')
        encoder.rnn = decoder.rnn

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None and not init_encoder:
        print('Loading model parameters from checkpoint.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)
        if init_encoder:
            model_dict = checkpoint['model']
            encoder_dict = {}

            model_dict_keys = []
            for key in model_dict.keys():
                if key[:7] == 'encoder':
                    if key[-7:] == 'reverse':
                        if int(key[-9]) > top_layer:
                            continue
                    else:
                        if key[8:18] != 'embeddings' and int(
                                key[-1]) > top_layer:
                            continue
                    model_dict_keys.append(key)
            print(model_dict_keys)

            # Load encoder parameters
            new_model_dict = model.state_dict()
            for key, value in model_dict.items():
                if key in model_dict_keys:
                    new_model_dict[key] = value
            """
            if rev_checkpoint is not None:
                rev_model_dict = rev_checkpoint['model']
                if key[:7] == 'encoder' and key[8:18] != 'embeddings':
                    new_model_dict[key+'_reverse'] = value
            """
            model.load_state_dict(new_model_dict)

            # Freeze encoder parameters
            for name, param in model.named_parameters():
                if name in model_dict_keys:
                    param.requires_grad = False

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #16
0
 def run_encoder_to_decoder_state(self, src, second_src, lengths):
     return NMTModel.run_encoder_to_decoder_state(self, src, lengths)
Beispiel #17
0
def make_base_model(model_opt, fields, gpu, checkpoint=None, spelling=None, tgt_char_field=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    if model_opt.use_char_composition == 'None':
        word_representer = None
    else:
        word_representer = WordRepresenter(spelling,
                                           len(tgt_char_field.vocab),
                                           tgt_char_field.vocab.stoi[onmt.io.PAD_WORD],
                                           model_opt.tgt_word_vec_size,
                                           char_composition=model_opt.use_char_composition)
        if gpu:
            word_representer.init_cuda()

    if model_opt.use_char_composition == 'None':
        tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                         feature_dicts, for_encoder=False)
    else:
        tgt_embeddings = VarEmbedding(word_representer, model_opt.tgt_word_vec_size)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight
    #TODO: replace the tgt_embeddings obj with our VarEmbeddings
    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        if model_opt.use_char_composition == 'None':
            generator = nn.Sequential(
                nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
                nn.LogSoftmax(dim=-1))
        else:
            # TODO: make a new generator with VarLinear
            generator = nn.Sequential(
                VarLinear(word_representer),
                nn.LogSoftmax(dim=-1))
            pass
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings') and model_opt.use_char_composition == 'None':
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #18
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """

    # Make encoder.
    src_dict = fields["src"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
    src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
    encoder = make_encoder(model_opt, src_embeddings)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    if model_opt.decoder_model == 'Rev' and model_opt.encoder_model == 'Rev':
        model = RevNMTModel(encoder,
                            decoder,
                            fields['tgt'].vocab,
                            opt=model_opt)
    else:
        model = NMTModel(encoder, decoder, opt=model_opt)

    model.model_type = model_opt.model_type

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt,
                    fields,
                    gpu,
                    checkpoint=None,
                    stage1=True,
                    basic_enc_dec=False):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    if stage1 and not basic_enc_dec:
        assert False
        src = "src1"
        tgt = "tgt1"
    else:
        src = "src2"
        tgt = "tgt2"
    src_hist = "src1_hist" if (basic_enc_dec or stage1) else None
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields[src].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, src)

        src_embeddings = make_embeddings(model_opt,
                                         src_dict,
                                         feature_dicts,
                                         hist_dict=fields[src_hist].vocab,
                                         use_hier_hist=True)

        encoder = make_encoder(model_opt, src_embeddings, stage1,
                               basic_enc_dec)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields[tgt].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, tgt)
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings, stage1
                           and not basic_enc_dec, basic_enc_dec)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    generator = CopyGenerator(model_opt.rnn_size, fields["tgt2"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
        # print("model load stats ...")
        # new_model_keys = set(model.state_dict().keys())
        # old_model_keys = set(checkpoint['model'].keys())
        # print("missing keys when load...")
        # print(new_model_keys - old_model_keys)
        # print("abundant keys when load...")
        # print(old_model_keys - new_model_keys)

        # print("gen load stats...")
        # new_gen_keys = set(generator.state_dict().keys())
        # old_gen_keys = set(checkpoint['generator'].keys())
        # print("missing keys when load...")
        # print(new_gen_keys - old_gen_keys)
        # print("abundant keys when load...")
        # print(old_gen_keys - new_gen_keys)
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #20
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    def _init_model():
      if checkpoint is not None:
          print('Loading model parameters.')
          load_state_dict(model, checkpoint['model'])
          if model.encoder.ent_attn.linear_query.weight is not None and 'encoder.ent_attn.linear_query.weight' not in checkpoint['model']:
              init_val = model_opt.param_init
              model.encoder.ent_attn.linear_query.weight.data.uniform_(-init_val, init_val)
              model.encoder.ent_attn.linear_out.bias.data.uniform_(-init_val, init_val)
              model.encoder.ent_attn.linear_query.bias.data.uniform_(-init_val, init_val)
              model.encoder.ent_attn.linear_context.weight.data.uniform_(-init_val, init_val)
              model.encoder.ent_attn.v.weight.data.uniform_(-init_val, init_val)
              model.encoder.ent_attn.linear_out.weight.data.uniform_(-init_val, init_val)
      else:
          if model_opt.param_init != 0.0:
              print('Intializing model parameters.')
              for p in model.parameters():
                  p.data.uniform_(-model_opt.param_init, model_opt.param_init)
          if model_opt.param_init_glorot:
              for p in model.parameters():
                  if p.dim() > 1:
                      xavier_uniform(p)

          if hasattr(model.encoder, 'embeddings'):
              model.encoder.embeddings.load_pretrained_vectors(
                      model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
          if hasattr(model.decoder, 'embeddings'):
              model.decoder.embeddings.load_pretrained_vectors(
                      model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Load the gen states from checkpoint or initialize them.
    def _fill_generator(gen, name):
      if checkpoint is not None and name in checkpoint:
          print('Loading gen parameters.')
          gen.load_state_dict(checkpoint[name])
      else:
          if model_opt.param_init != 0.0:
              print('Intializing gen parameters.')
              for p in gen.parameters():
                  p.data.uniform_(-model_opt.param_init, model_opt.param_init)
          if model_opt.param_init_glorot:
              for p in gen.parameters():
                  if p.dim() > 1:
                      xavier_uniform(p)

    _init_model()
    _fill_generator(generator, 'generator')

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #21
0
def make_base_model(model_opt, src_dict, tgt_dict, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    src_embeddings = make_embeddings(model_opt, src_dict, for_encoder=True)
    encoder = make_encoder(model_opt, src_embeddings)

    # Make decoder.
    tgt_embeddings = make_embeddings(model_opt, tgt_dict, for_encoder=False)
    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    # if model_opt.pre_word_vecs_enc is not None:
    #     print("Loading word vectors for encoder")
    #     pretrained = torch.load(opt.pre_word_vecs_enc)
    #     src_embeddings.word_lut.weight.data.copy_(pretrained)

    # if model_opt.pre_word_vecs_dec is not None:
    #     print("Loading word vectors for encoder")
    #     pretrained = torch.load(opt.pre_word_vecs_dec)
    #     tgt_embeddings.word_lut.weight.data.copy_(pretrained)

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(nn.Linear(model_opt.rnn_size, len(tgt_dict)),
                                  nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, tgt_dict)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #22
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))
    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)


    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)
    if model_opt.image_feat_type =='local':
        # print('@@@ModelConstructor enter')
        # use the local image features "as is": encoder only reshapes them  
        #将feat_size通过一层网络映射到rnn_size
        encoder_img = make_encoder_image_local_features(model_opt)
    elif model_opt.image_feat_type=='global':
        # transform global image features before using them   
        encoder_img = make_encoder_image_global_features(model_opt)
    else:
        encoder_img=None

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder,encoder_img)

    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
   

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #23
0
def make_auto_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    src_fields = fields[0]
    tgt_fields = fields[1]

    # Make encoder.
    src_dict = src_fields["src"].vocab
    feature_dicts = onmt.IO.collect_feature_dicts(src_fields, 'src')
    src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
    src_encoder = make_encoder(model_opt, src_embeddings)
    src_embeddings = make_embeddings(model_opt,
                                     src_dict,
                                     feature_dicts,
                                     for_encoder=False)
    src_decoder = make_decoder(model_opt, src_embeddings)

    # Make decoder.
    tgt_dict = tgt_fields["src"].vocab
    feature_dicts = onmt.IO.collect_feature_dicts(tgt_fields, 'src')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts)
    tgt_encoder = make_encoder(model_opt, tgt_embeddings)
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)
    tgt_decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    src_auto_model = NMTModel(src_encoder, src_decoder)
    tgt_auto_model = NMTModel(tgt_encoder, tgt_decoder)

    # Make Generator.
    src_generator = nn.Sequential(
        nn.Linear(model_opt.rnn_size, len(src_fields["tgt"].vocab)),
        nn.LogSoftmax())
    tgt_generator = nn.Sequential(
        nn.Linear(model_opt.rnn_size, len(tgt_fields["tgt"].vocab)),
        nn.LogSoftmax())

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # JD TODO: FIX THIS!
        print('Loading model parameters.')
        src_auto_model.load_state_dict(checkpoint['src_auto_model'])
        tgt_auto_model.load_state_dict(checkpoint['tgt_auto_model'])
        src_generator.load_state_dict(checkpoint['src_generator'])
        tgt_generator.load_state_dict(checkpoint['tgt_generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in src_encoder.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in src_decoder.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in tgt_encoder.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in tgt_decoder.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)

            src_encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
            src_decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
            tgt_encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)
            tgt_decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

            for p in src_generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in tgt_generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)

    # Add generator to model (this registers it as parameter of model).
    src_auto_model.generator = src_generator
    tgt_auto_model.generator = tgt_generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        src_auto_model.cuda()
        tgt_auto_model.cpu()
    else:
        src_auto_model.cpu()
        tgt_auto_model.cpu()

    return src_auto_model, tgt_auto_model
Beispiel #24
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        print('Collecting feature vocabularies...')
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        print('Building source embeddings...')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        print('Building an encoder...')
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')

    if model_opt.char_compos_type == 'none':

        tgt_embeddings = make_embeddings(model_opt,
                                         tgt_dict,
                                         feature_dicts,
                                         for_encoder=False)
        print('Using standard embeddings')

    else:

        spells = getVocabSpell(tgt_dict, gpu)
        embedding_dim = model_opt.tgt_word_vec_size

        chanQty = model_opt.char_comp_cnn_chan_qty
        dropout = model_opt.dropout
        numLayers = model_opt.char_comp_rnn_layer

        configs = {}

        for compType in model_opt.char_compos_type.split('-'):

            if compType == 'brnn':
                configs['brnn'] = {
                    'numLayers': numLayers,
                    'embedSize': embedding_dim
                }
            elif compType == 'rnn':
                configs['rnn'] = {
                    'numLayers': numLayers,
                    'embedSize': embedding_dim
                }
            elif compType == 'cnn':
                configs['cnn'] = {
                    'chanQty': chanQty,
                    'embedSize': embedding_dim
                }
            elif compType == 'wembed':
                configs['wembed'] = {'embedSize': embedding_dim}
            else:
                raise Exception('Invalid composition type: ' + compType)

        tgt_embeddings = Char2VecComposite(
            spells,
            configs,
            charEmbedSize=model_opt.char_embed_size,
            dropout=dropout)

        print('Using char-level composition embeddings of type %s' %
              model_opt.char_compos_type)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            print('Loading pre-trained source vectors: %s/%s' %
                  (model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc))
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder,
                   'embeddings') and model_opt.char_compos_type == 'none':
            print('Loading pre-trained source vectors: %s/%s' %
                  (model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec))
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        print('Moving model to CUDA')
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #26
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type
    # import pdb; pdb.set_trace()

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')

        # print("checkpoint")
        # for name, param in sorted(checkpoint["model"].items()):
        #     print(f"{name}", param.size())
        # print()
        # print("model itself")
        # for name, param in sorted(model.state_dict().items()):
        #     print(f"{name}", param.size())
        # print()
        state = model.state_dict()
        if state.keys() == checkpoint["model"].keys():
            model.load_state_dict(checkpoint["model"])
        else:
            state = model.state_dict()
            state.update(checkpoint["model"])
            model.load_state_dict(state)
            for name, param in model.named_parameters():
                if "embedding" in name:
                    if model_opt.param_init != 0.0:
                        param.data.uniform_(-model_opt.param_init,
                                            model_opt.param_init)
                    if model_opt.param_init_glorot:
                        if param.dim() > 1:
                            xavier_uniform(p)

        # print("checkpoint")
        # for name, param in sorted(checkpoint["generator"].items()):
        #     print(f"{name}", param.size())
        # print()
        # print("model itself generator")
        # for name, param in sorted(generator.state_dict().items()):
        #     print(f"{name}", param.size())

        state = generator.state_dict()
        if state.keys() == checkpoint["generator"].keys():
            generator.load_state_dict(checkpoint["generator"])
        else:
            state = generator.state_dict()
            state.update(checkpoint["generator"])
            generator.load_state_dict(state)
            for name, param in generator.named_parameters():
                if "linear.bias" in name or "linear.weight" in name:
                    if model_opt.param_init != 0.0:
                        param.data.uniform_(-model_opt.param_init,
                                            model_opt.param_init)
                    if model_opt.param_init_glorot:
                        if param.dim() > 1:
                            xavier_uniform(p)
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt, fields, gpu, checkpoint=None, train_part="all"):
    """
	Args:
		model_opt: the option loaded from checkpoint.
		fields: `Field` objects for the model.
		gpu(bool): whether to use gpu.
		checkpoint: the model gnerated by train phase, or a resumed snapshot
					model from a stopped training.
	Returns:
		the NMTModel.
	"""
    assert model_opt.model_type in ["text", "img", "audio"], \
     ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)
    context = make_context(model_opt, tgt_dict)

    # Make NMTModel(= encoder + decoder).
    if model_opt.RISK_ratio > 0.0:
        scorer = onmt.translate.GNMTGlobalScorer(model_opt.alpha,
                                                 model_opt.beta,
                                                 model_opt.coverage_penalty,
                                                 model_opt.length_penalty)
        model = NMTModel(encoder,
                         decoder,
                         context,
                         context_type=model_opt.context_type,
                         tgt_vocab=fields['tgt'].vocab,
                         beam_size=model_opt.beam_size,
                         n_best=model_opt.n_best,
                         gpu=gpu,
                         scorer=scorer,
                         min_length=model_opt.min_length,
                         max_length=model_opt.max_length,
                         stepwise_penalty=model_opt.stepwise_penalty,
                         block_ngram_repeat=model_opt.block_ngram_repeat,
                         ignore_when_blocking=model_opt.ignore_when_blocking,
                         copy_attn=model_opt.copy_attn,
                         context_size=model_opt.context_size)
    else:
        model = NMTModel(encoder,
                         decoder,
                         context,
                         context_type=model_opt.context_type)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model_dict = checkpoint['model']
        if train_part == "context":
            model_dict = model.state_dict()
            if 'join' in model_opt.context_type:
                pretrained_dict = {}
                for k, v in checkpoint['model'].items():
                    if k in model_dict:
                        if 'doc_context' in k:
                            k = k.replace('doc_context', 'doc_context.0')
                        pretrained_dict[k] = v
            else:
                pretrained_dict = {
                    k: v
                    for k, v in checkpoint['model'].items()
                    if k in model_dict and 'doc_context' not in k
                }
            model_dict.update(pretrained_dict)

        model.load_state_dict(model_dict, strict=False)
        generator.load_state_dict(checkpoint['generator'])
        if train_part == "context":
            print("Freezing parameters of main model")
            for param in model.parameters():
                param.require_grad = False
            for param in generator.parameters():
                param.require_grad = False
            print("Unfreezing parameters of context")
            for param in model.doc_context.parameters():
                param.require_grad = True
                if model_opt.param_init != 0.0:
                    param.data.uniform_(-model_opt.param_init,
                                        model_opt.param_init)
                if model_opt.param_init_glorot:
                    if param.dim() > 1:
                        xavier_uniform(param)
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make Generator.
    use_multimodal_model = model_opt.multimodal_type is not None
    if use_multimodal_model and 'gm' in model_opt.multimodal_type:
        generator_in_size = model_opt.rnn_size + model_opt.second_dim
    else:
        generator_in_size = model_opt.rnn_size
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(generator_in_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(generator_in_size, fields["tgt"].vocab)

    if use_multimodal_model:
        second_dim_in = model_opt.second_dim_in
        second_dim = model_opt.second_dim
        mmm_class = onmt.modules.MultiModalModel.multimodal_model_class_by_key(
            model_opt.multimodal_type)
        model = mmm_class(
            encoder=encoder,
            second_encoder=nn.Sequential(nn.Linear(second_dim_in, second_dim),
                                         nn.Sigmoid()),
            second_dim=second_dim,
            decoder=decoder,
            generator=None  # Assigned later
        )
    else:
        # Make NMTModel(= encoder + decoder).
        model = NMTModel(encoder, decoder)
        model.model_type = model_opt.model_type

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #29
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    # Make NMTModel(= encoder + decoder).
    if model_opt.encoder_type == "trigramrnn" and model_opt.decoder_type == "rnn":
        decoder = make_decoder(model_opt, tgt_embeddings)
        model = NMTSourceTrigramModel(encoder, decoder)
    elif model_opt.encoder_type == "brnn" and model_opt.decoder_type == "charrnn":
        [decoder1, decoder2] = make_decoder(model_opt, tgt_embeddings)
        model = NMTTargetCharModel(encoder, decoder1, decoder2)
    elif model_opt.encoder_type == "trigramrnn" and model_opt.decoder_type == "charrnn":
        [decoder1, decoder2] = make_decoder(model_opt, tgt_embeddings)
        model = CharNMTModel(encoder, decoder1, decoder2)
    else:
        decoder = make_decoder(model_opt, tgt_embeddings)
        model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Initializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            if model_opt.pre_encoder:
                pretrained = torch.load(model_opt.pre_encoder)
                encoder_dict = {}
                for key in pretrained['model']:
                    if key.startswith('encoder'):
                        encoder_dict[key] = pretrained['model'][key]
                model_dict = model.state_dict()
                model_dict.update(encoder_dict)
                model.load_state_dict(model_dict)

                model.encoder.requires_grad = False
                for p in model.encoder.parameters():
                    p.requires_grad = False

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if model_opt.decoder_type == "charrnn":
            if hasattr(model.decoder1, 'embeddings'):
                model.decoder1.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)
        else:
            if hasattr(model.decoder, 'embeddings'):
                model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(opt, model_opt, fields, checkpoint=None):
    """
    Args:
        opt: the option in current environment.
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        checkpoint: the snapshot model.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make Encoder.
    if model_opt.model_type == "text":
        src_vocab = fields["src"].vocab
        feature_dicts = ONMTDataset.collect_feature_dicts(fields)
        feats_padding_idx = [
            feat_dict.stoi[onmt.IO.PAD_WORD] for feat_dict in feature_dicts
        ]
        num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts]
        src_embeddings = make_embeddings(
            model_opt,
            src_vocab.stoi[onmt.IO.PAD_WORD],
            feats_padding_idx,
            len(src_vocab),
            for_encoder=True,
            num_feat_embeddings=num_feat_embeddings)

        encoder = make_encoder(model_opt, src_embeddings)
    else:
        encoder = ImageEncoder(model_opt.layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)

    # Make Decoder.
    tgt_vocab = fields["tgt"].vocab
    # TODO: prepare for a future where tgt features are possible
    feats_padding_idx = []
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_vocab.stoi[onmt.IO.PAD_WORD],
                                     feats_padding_idx,
                                     len(tgt_vocab),
                                     for_encoder=False)
    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= Encoder + Decoder).
    model = NMTModel(encoder, decoder)

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt, fields["src"].vocab,
                                  fields["tgt"].vocab)

    # Load the modle states from checkpoint.
    if checkpoint is not None:
        print('Loading model')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])

    # Make the whole model leverage GPU if indicated to do so.
    if hasattr(opt, 'gpuid'):
        cuda = len(opt.gpuid) >= 1
    elif hasattr(opt, 'gpu'):
        cuda = opt.gpu > -1
    else:
        cuda = False

    if cuda:
        model.cuda()
        generator.cuda()
    else:
        model.cpu()
        generator.cpu()
    model.generator = generator

    return model