Example #1
0
def build_reranker_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build encoder.
    src_dict = fields["src"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
    src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
    reranker = build_reranker(model_opt, src_embeddings)
    reranker = onmt.models.ReRankerModel(reranker)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    reranker.model_type = model_opt.model_type

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        reranker.load_state_dict(checkpoint['reranker'])
    else:
        if model_opt.param_init != 0.0:
            for p in reranker.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in reranker.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in reranker.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(reranker.reranker, 'embeddings'):
            reranker.reranker.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)

    reranker.to(device)

    return reranker
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the MemModel.
    """
    # Build encoder.
    src_dict = fields["src"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
    src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)

    encoder = DistractorEncoder(
        model_opt.rnn_type, model_opt.word_encoder_type,
        model_opt.sent_encoder_type, model_opt.question_init_type,
        model_opt.word_encoder_layers, model_opt.sent_encoder_layers,
        model_opt.question_init_layers, model_opt.rnn_size, model_opt.dropout,
        src_embeddings, model_opt.lambda_question, model_opt.lambda_answer)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')
        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    bidirectional_encoder = True if model_opt.question_init_type == 'brnn' else False
    decoder = HierDecoder(model_opt.rnn_type, bidirectional_encoder,
                          model_opt.dec_layers, model_opt.rnn_size,
                          model_opt.global_attention, model_opt.dropout,
                          tgt_embeddings)

    # Build ans_decoder.
    ans_dict = fields["answer"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'answer')
    ans_embeddings = build_embeddings(model_opt,
                                      ans_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != ans_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')
        ans_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    ans_bidirectional_encoder = True if model_opt.question_init_type == 'brnn' else False
    ans_decoder = HierDecoderAns(model_opt.rnn_type, ans_bidirectional_encoder,
                                 model_opt.dec_layers, model_opt.rnn_size,
                                 model_opt.global_attention, model_opt.dropout,
                                 ans_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = DGModel(encoder, decoder, ans_decoder)

    # Build Generator.
    gen_func = nn.LogSoftmax(dim=-1)
    generator = nn.Sequential(
        nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)), gen_func)

    # Build AnsGenerator.
    ans_gen_func = nn.LogSoftmax(dim=-1)
    ans_generator = nn.Sequential(
        nn.Linear(model_opt.dec_rnn_size, len(fields["answer"].vocab)),
        ans_gen_func)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
        ans_generator.load_state_dict(checkpoint['ans_generator'],
                                      strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)
        if hasattr(model.ans_decoder, 'embeddings'):
            model.ans_decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.ans_generator = ans_generator
    model.to(device)

    return model
Example #3
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # for backward compatibility
    if model_opt.rnn_size != -1:
        model_opt.enc_rnn_size = model_opt.rnn_size
        model_opt.dec_rnn_size = model_opt.rnn_size
        if model_opt.model_type == 'text' and \
           model_opt.enc_rnn_size != model_opt.dec_rnn_size:
            raise AssertionError("""We do not support different encoder and
                                     decoder rnn sizes for translation now.""")

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    if model_opt.refer:
        ref_dict = fields["ref"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'ref')
        ref_embeddings = build_embeddings(model_opt, ref_dict, feature_dicts)
        refer = build_encoder(model_opt, ref_embeddings)
    else:
        refer = None
    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.NMTModel(encoder, decoder, refer)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
            gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = \
            {fix_key(k): v for (k, v) in checkpoint['model'].items()}
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Example #4
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the MemModel.
    """
    # Build encoder.
    src_dict = fields["src"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
    src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)

    encoder = build_encoder(model_opt, src_embeddings)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')
        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")

    model = onmt.models.MemModel(encoder, decoder)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
            gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    elif model_opt.coref_vocab or model_opt.coref_attn:
        generator = CorefGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab,
                                   fields["coref_tgt"].vocab)
    else:
        generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Example #5
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Build Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    if model_opt.share_embeddings:
        assert model.encoder.embeddings.word_lut.weight \
               is model.decoder.embeddings.word_lut.weight
    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Example #6
0
def build_end2end_model(model_opt,
                        fields,
                        gpu,
                        checkpoint=None,
                        sel_checkpoint=None,
                        s2s_gen_checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
        sel_checkpoint: the model gnerated by selector pre-train phase.
    Returns:
        the E2EModel.
    """
    assert model_opt.model_type in ["text"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build selector.
    src_dict = fields["src"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
    sel_src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
    selector = build_selector(model_opt, sel_src_embeddings)

    # Build encoder
    if model_opt.e2e_type == "separate_enc_sel":
        if model_opt.selector_share_embeddings:
            # the shared embeddings are in the encoder.embeddings
            # TODO: change the state name to load the embeddings in the pretrained selector embeddings
            assert model_opt.load_pretrained_selector_from == ''
            src_embeddings = build_embeddings(model_opt, src_dict,
                                              feature_dicts)
            src_embeddings.word_lut.weight = sel_src_embeddings.word_lut.weight
        else:
            src_embeddings = build_embeddings(model_opt, src_dict,
                                              feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    else:
        # model_opt.e2e_type == "share_enc_sel"
        src_embeddings = sel_src_embeddings
        encoder = None

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build E2EModel(= encoder + selector + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.E2EModel(encoder,
                                 selector,
                                 decoder,
                                 e2e_type=model_opt.e2e_type,
                                 use_gt_sel_probs=model_opt.use_gt_sel_probs)
    model.model_type = model_opt.model_type

    # Build Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['end2end_model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if sel_checkpoint is not None:
            model.load_state_dict(sel_checkpoint['selector'], strict=False)

        if s2s_gen_checkpoint is not None:
            model.load_state_dict(s2s_gen_checkpoint['model'], strict=False)
            generator.load_state_dict(s2s_gen_checkpoint['generator'])

        # if hasattr(model.encoder, 'embeddings'):
        #     model.encoder.embeddings.load_pretrained_vectors(
        #         model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        # if hasattr(model.decoder, 'embeddings'):
        #     model.decoder.embeddings.load_pretrained_vectors(
        #         model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Example #7
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)

        encoder = build_encoder(
            model_opt, src_embeddings
        )  # we added additional encoder: TransformerEncoderLM

    elif model_opt.model_type == "img":
        if ("image_channel_size" not in model_opt.__dict__):
            image_channel_size = 3
        else:
            image_channel_size = model_opt.image_channel_size

        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dropout,
                               image_channel_size)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers,
                               model_opt.dec_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dec_rnn_size,
                               model_opt.audio_enc_pooling, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    lm_aux = model_opt.encoder_type == "transformerAuxLTR"

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    # the model will return more stuff
    model = onmt.models.NMTModel(encoder, decoder, lm_aux=lm_aux)

    # Build Generator.
    # Hmmm...generator is just hidden states -> word in vocab
    # since we use shared embedding between encoder and decoder..plus shared embedding between
    # decoder src to tgt...
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
            gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab)

    # Build Source Generator
    # not considering copy attention right now
    if lm_aux:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        # source vocab does not have <s> </s>, but share_vocab might be different...
        src_generator = nn.Sequential(
            nn.Linear(model_opt.enc_rnn_size, len(fields["src"].vocab)),
            gen_func)
        # this would have made sure that both encoder and decoder share the same generator
        if model_opt.share_decoder_embeddings:
            src_generator[0].weight = src_embeddings.word_lut.weight

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
        # if lm_aux:
        #     src_generator.load_state_dict(checkpoint['src_generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            if lm_aux:
                for p in src_generator.parameters():
                    p.data.uniform_(-model_opt.param_init,
                                    model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            if lm_aux:
                for p in src_generator.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    if lm_aux:
        model.src_generator = src_generator
    model.to(device)

    return model
Example #8
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build encoder.
    src_dict = fields["src"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
    src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
    encoder = build_encoder(model_opt, src_embeddings)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= [session encoder] + encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")

    if model_opt.experiment == 'session':
        # Build Session Encoder.
        item_embeddings = build_embeddings(model_opt,
                                           fields["src_item_sku"].vocab, [],
                                           for_user=True)
        user_log_embeddings = build_embeddings(model_opt,
                                               fields["src_user_log"].vocab,
                                               [],
                                               for_user=True)
        user_op_embeddings = build_embeddings(model_opt,
                                              fields["src_operator"].vocab, [],
                                              for_user=True)
        user_site_cy_embeddings = build_embeddings(model_opt,
                                                   fields["src_site_cy"].vocab,
                                                   [],
                                                   for_user=True)
        user_site_pro_embeddings = build_embeddings(
            model_opt, fields["src_site_pro"].vocab, [], for_user=True)
        user_site_ct_embeddings = build_embeddings(model_opt,
                                                   fields["src_site_ct"].vocab,
                                                   [],
                                                   for_user=True)
        session_encoder = SessionEncoder(item_embeddings, user_log_embeddings,
                                         user_op_embeddings,
                                         user_site_cy_embeddings,
                                         user_site_pro_embeddings,
                                         user_site_ct_embeddings)
    else:
        session_encoder = None

    model = onmt.models.NMTModel(session_encoder, encoder, decoder)
    model.model_type = model_opt.model_type

    # Build Generator. Copy Generator.
    generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab,
                              model_opt.session_weight,
                              model_opt.explanation_weight)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # for backward compatibility
    if model_opt.rnn_size != -1:
        model_opt.enc_rnn_size = model_opt.rnn_size
        model_opt.dec_rnn_size = model_opt.rnn_size
        if model_opt.model_type == 'text' and \
           model_opt.enc_rnn_size != model_opt.dec_rnn_size:
            raise AssertionError("""We do not support different encoder and
                                     decoder rnn sizes for translation now.""")

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        if ("image_channel_size" not in model_opt.__dict__):
            image_channel_size = 3
        else:
            image_channel_size = model_opt.image_channel_size

        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dropout,
                               image_channel_size)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers,
                               model_opt.dec_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dec_rnn_size,
                               model_opt.audio_enc_pooling, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.NMTModel(encoder, decoder)

    # The generator function only matters at translation time, so it is not
    # necessary to create it here anymore. At translation time, the model's
    # model_opt will still have a value for generator_function or loss_alpha.
    # This is sufficient to figure out what function to use at translation
    # time.
    generator = nn.Sequential(
        nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)))
    if model_opt.share_decoder_embeddings:
        generator[0].weight = decoder.embeddings.word_lut.weight

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = \
            {fix_key(k): v for (k, v) in checkpoint['model'].items()}
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Example #10
0
def build_base_model(model_opt,
                     fields,
                     gpu,
                     length_model,
                     length_penalty_a,
                     length_penalty_b,
                     checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # for backward compatibility
    if model_opt.rnn_size != -1:
        model_opt.enc_rnn_size = model_opt.rnn_size
        model_opt.dec_rnn_size = model_opt.rnn_size
        if model_opt.model_type == 'text' and \
           model_opt.enc_rnn_size != model_opt.dec_rnn_size:
            raise AssertionError("""We do not support different encoder and
                                     decoder rnn sizes for translation now.""")

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        if ("image_channel_size" not in model_opt.__dict__):
            image_channel_size = 3
        else:
            image_channel_size = model_opt.image_channel_size

        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dropout,
                               image_channel_size)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers,
                               model_opt.dec_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dec_rnn_size,
                               model_opt.audio_enc_pooling, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.NMTModel(encoder, decoder)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        # generator = nn.Sequential(
        #     nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
        #     gen_func
        # )

        # MMM
        class tune_out_prob(nn.Module):
            def __init__(self):
                super(tune_out_prob, self).__init__()
                self.t_lens = None
                self.eos_ind = None
                self.batch_max_len = None
                self.word_index = None
                self.tgt_vocab_size = None
                self.validation = False

            def length_model_loss(self, scale, value, a, b):
                #return -(value / scale) ** 2 - scale.log()
                #return -((value / scale) **2)/2 - (2.5066*scale).log()
                return -a * (value / scale)**2 + b  #*abs(scale)
                # return -((value / scale) ** 2)*scale + scale
                #return -(value / scale)*4 + scale

            def forward(self, x):
                y = x.clone()
                #mask = np.ones(x.size())
                # for i in range(self.t_lens.size(-1)):
                #     y[i*self.batch_size + self.t_lens[i], self.eos_ind] = \
                #         y[i * self.batch_size + self.t_lens[i], self.eos_ind].clone() + math.log(0.9)
                if self.training or self.validation:  # training phase
                    y = y.view(self.batch_max_len, -1, self.tgt_vocab_size)
                    # eos_list = [(i * self.batch_max_len + self.t_lens.data.cpu().numpy()[i]) for i in
                    #             range(self.t_lens.size(-1))]
                    # other_list = list(set(list(range(x.size(0)))) - set(eos_list))
                    # y[other_list, self.eos_ind] = -100
                    # y[eos_list, self.eos_ind] = 0
                    for wi in range(self.batch_max_len):
                        delta_p = (self.t_lens - wi - 1).float()
                        delta_p[delta_p < 0] = 0.05 * delta_p[delta_p < 0]
                        scale = (self.t_lens.float()).sqrt() / 2.0
                        penalties = self.length_model_loss(
                            scale, delta_p, length_penalty_a, length_penalty_b)
                        #penalties[penalties > 0] = 0
                        y[wi, :, self.eos_ind] += penalties
                    y = y.view(-1, self.tgt_vocab_size)
                    #mask[eos_list, self.eos_ind] = +2
                    #mask[other_list, self.eos_ind] = -2
                else:  # translation phase
                    if len(
                            x.size()
                    ) == 3:  # x of shape [ tgt_len, batch_size, vocab ] is a full sentence
                        # for i in range(len(self.t_lens)):
                        #     other_list = list(set(list(range(x.size(0)))) - set(list([self.t_lens.data.cpu().numpy()[i]])))
                        #     #mask[other_list, i, self.eos_ind] = -2
                        #     y[other_list, i, self.eos_ind] = -100
                        #     if self.t_lens[i] < x.size(0):
                        #         #mask[self.t_lens[i], i, self.eos_ind] = +2
                        #         y[self.t_lens[i], i, self.eos_ind] = 0
                        pass
                    else:  # x of shape [(batch_size x beam_size) , vocab ] is only for one step
                        beam_size = x.size(0) // self.t_lens.numel()
                        wi = self.word_index
                        delta_p = (self.t_lens - wi - 2).float()
                        delta_p[delta_p < 0] = 0.005 * delta_p[delta_p < 0]
                        delta_p = delta_p.unsqueeze(1).expand(
                            self.t_lens.numel(), beam_size).flatten()
                        scale = (self.t_lens.float()).sqrt() / 2.0
                        scale = scale.unsqueeze(1).expand(
                            self.t_lens.numel(), beam_size).flatten()
                        penalties = self.length_model_loss(
                            scale, delta_p, length_penalty_a, length_penalty_b)
                        #penalties[penalties > 0] = 0
                        y[:, self.eos_ind] += penalties
                        #y[eos_list ^ 1, self.eos_ind] = -100
                return y
                #mask = torch.tensor(mask, dtype=x.dtype).to(device)
                #x= x+mask
                #return x

                # y = x.clone()
                # # 1. since y is the output of log_softmax, apply exponential
                # # to convert it to probabilistic form
                # y = torch.exp(y)
                # # 2. tune probabilities
                # eos_list = [(i * self.batch_max_len + self.t_lens.data.cpu().numpy()[i]) for i in
                #             range(self.t_lens.size(-1))]
                # other_list = list(set(list(range(y.size(0)))) - set(eos_list))
                #
                # z = y.clone()
                # # 2.1. tune probabilities for eos positions
                # z[eos_list, self.eos_ind] = 1
                # z[eos_list, 0:self.eos_ind] = 0
                # z[eos_list, self.eos_ind+1:-1] = 0
                #
                # # 2.2. tune probabilities for non-eos positions
                # p_val = z[other_list, self.eos_ind] / (self.tgt_vocab_size - 1)
                # z[other_list, self.eos_ind] = 0
                # non_eos_inds = list(set(list(range(self.tgt_vocab_size))) - set([self.eos_ind]))
                # for i in range(len(other_list)):
                #     z[other_list[i], non_eos_inds] = y[other_list[i], non_eos_inds] + p_val[i]
                #
                # # 3. convert y back to log-probability form
                # z = torch.log(z)
                # return z

        # MMM
        if length_model == 'oracle' or length_model == 'fixed_ratio' or length_model == 'lstm':
            generator = nn.Sequential(
                nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
                gen_func, tune_out_prob())
        else:
            generator = nn.Sequential(
                nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
                gen_func)
        # generator = nn.Sequential(
        #     nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
        #     gen_func
        # )
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = \
            {fix_key(k): v for (k, v) in checkpoint['model'].items()}
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Example #11
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type == "text", ("Unsupported model type %s" %
                                            (model_opt.model_type))

    # Build encoder.
    logger.info("build_base_model")
    if model_opt.model_type == "text":
        src_dict = fields[
            "src"].vocab  # torchtext.vocab.Vocab object: dict_keys(['vectors', 'stoi', 'freqs', 'itos'])

        feature_dicts = inputters.collect_feature_vocabs(fields,
                                                         'src')  # list: []
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        '''  Embeddings(
  (make_embedding): Sequential(
    (emb_luts): Elementwise(
      (0): Embedding(24997, 500, padding_idx=1)
    )
  )
)
        '''
        #logger.info("src embeddings")
        #logger.info(src_embeddings)
        logger.info("bulding question encoder")
        encoder = build_encoder(model_opt, src_embeddings)
        logger.info(encoder)

        ############### Modified ###############################

        ans_dict = fields["ans"].vocab
        ans_embeddings = build_embeddings(model_opt, ans_dict, feature_dicts)
        logger.info("building answer encoder")
        encoder_ans = build_encoder(model_opt, ans_embeddings)
        logger.info(encoder_ans)
        ##########################################################s

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    model = NMTModel(encoder, encoder_ans, decoder)

    model.model_type = model_opt.model_type

    # Build Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size * 2, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Example #12
0
def build_base_model(model_opt,
                     fields,
                     gpu,
                     FeatureValues,
                     FeatureTensors,
                     FeatureTypes,
                     FeaturesList,
                     FeatureNames,
                     FTInfos,
                     FeatureTypesNames,
                     SimulationLanguages,
                     checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        WALS info
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        if ("image_channel_size" not in model_opt.__dict__):
            image_channel_size = 3
        else:
            image_channel_size = model_opt.image_channel_size

        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               image_channel_size)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Wals

    print(
        'Building embeddings for each WALS feature and MLP models for each feature type...'
    )

    embeddings_list, embeddings_keys, mlp_list, mlp_keys = [], [], [], []

    for FeatureType in FeatureTypes:

        list_features = FeatureType[1]

        for Feature in list_features:

            globals()['embedding_%s' % Feature] = build_feature_embeddings(
                gpu, FeatureTensors, FeaturesList, FeatureNames,
                Feature)  # 192 embedding structures, one for each feature.
            embeddings_keys.append(Feature)
            embeddings_list.append(globals()['embedding_%s' % Feature])
        globals()['mlp_%s' % FeatureType[0]] = build_mlp_feature_type(
            model_opt, FTInfos, FeatureTypesNames,
            FeatureType[0])  # 11 MLPs, one for each feature type.
        mlp_keys.append(FeatureType[0])
        mlp_list.append(globals()['mlp_%s' % FeatureType[0]])

    embeddings_dic_keys = dict(zip(embeddings_keys, embeddings_list))
    EmbeddingFeatures = nn.ModuleDict(embeddings_dic_keys)

    mlp_dic_keys = dict(zip(mlp_keys, mlp_list))

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")

    if model_opt.wals_model == 'EncInitHidden_Target':

        MLP2RNNHiddenSize_Target = build_mlp2rnnhiddensize_target(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = EncoderInitialization(model_opt.wals_model, encoder, decoder,
                                      MLP2RNNHiddenSize_Target,
                                      EmbeddingFeatures, FeatureValues,
                                      FeatureTypes, SimulationLanguages,
                                      model_opt)
        print(
            "Model created: uses WALS features from the target language to initialize encoder's hidden state."
        )

    elif model_opt.wals_model == 'EncInitHidden_Both':

        MLP2RNNHiddenSize_Both = build_mlp2rnnhiddensize_both(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = EncoderInitialization(model_opt.wals_model, encoder, decoder,
                                      MLP2RNNHiddenSize_Both,
                                      EmbeddingFeatures, FeatureValues,
                                      FeatureTypes, SimulationLanguages,
                                      model_opt)
        print(
            "Model created: uses WALS features from the source and target languages to initialize encoder's hidden state."
        )

    elif model_opt.wals_model == 'DecInitHidden_Target':

        MLP2RNNHiddenSize_Target = build_mlp2rnnhiddensize_target(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = DecoderInitialization(model_opt.wals_model, encoder, decoder,
                                      MLP2RNNHiddenSize_Target,
                                      EmbeddingFeatures, FeatureValues,
                                      FeatureTypes, SimulationLanguages,
                                      model_opt)
        print(
            "Model created: adds WALS features from the target language to the encoder's output to initialize decoder's hidden state."
        )

    elif model_opt.wals_model == 'DecInitHidden_Both':

        MLP2RNNHiddenSize_Both = build_mlp2rnnhiddensize_both(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = DecoderInitialization(model_opt.wals_model, encoder, decoder,
                                      MLP2RNNHiddenSize_Both,
                                      EmbeddingFeatures, FeatureValues,
                                      FeatureTypes, SimulationLanguages,
                                      model_opt)
        print(
            "Model created: adds WALS features from the source and target languages to the encoder's output to initialize decoder's hidden state."
        )

    elif model_opt.wals_model == 'WalstoSource_Target':

        MLP2WALSHiddenSize_Target = build_mlp2walshiddensize_target(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = CombineWalsSourceWords(model_opt.wals_model, encoder, decoder,
                                       MLP2WALSHiddenSize_Target,
                                       EmbeddingFeatures, FeatureValues,
                                       FeatureTypes, SimulationLanguages,
                                       model_opt)
        print(
            "Model created: concatenates WALS features from the target language to source words embeddings."
        )

    elif model_opt.wals_model == 'WalstoSource_Both':

        MLP2WALSHiddenSize_Both = build_mlp2walshiddensize_both(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = CombineWalsSourceWords(model_opt.wals_model, encoder, decoder,
                                       MLP2WALSHiddenSize_Both,
                                       EmbeddingFeatures, FeatureValues,
                                       FeatureTypes, SimulationLanguages,
                                       model_opt)
        print(
            "Model created: concatenates WALS features from the source and target languages to source words embeddings."
        )

    elif model_opt.wals_model == 'WalstoTarget_Target':

        MLP2WALSHiddenSize_Target = build_mlp2walshiddensize_target(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = CombineWalsTargetWords(model_opt.wals_model, encoder, decoder,
                                       MLP2WALSHiddenSize_Target,
                                       EmbeddingFeatures, FeatureValues,
                                       FeatureTypes, SimulationLanguages,
                                       model_opt)
        print(
            "Model created: concatenates WALS features from the target language to target words embeddings."
        )

    elif model_opt.wals_model == 'WalstoTarget_Both':

        MLP2WALSHiddenSize_Both = build_mlp2walshiddensize_both(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = CombineWalsTargetWords(model_opt.wals_model, encoder, decoder,
                                       MLP2WALSHiddenSize_Both,
                                       EmbeddingFeatures, FeatureValues,
                                       FeatureTypes, SimulationLanguages,
                                       model_opt)
        print(
            "Model created: concatenates WALS features from the source and target languages to target words embeddings."
        )

    elif model_opt.wals_model == 'WalsDoublyAttentive_Target':

        MLPFeatureTypes = nn.ModuleDict(mlp_dic_keys)
        MLP_AttentionTarget = build_doublyattentive_target(model_opt)
        print('Embeddings for WALS features and MLP models are built!')
        model = WalsDoublyAttention(model_opt.wals_model, encoder, decoder,
                                    MLP_AttentionTarget, MLPFeatureTypes,
                                    EmbeddingFeatures, FeatureValues,
                                    FeatureTypes, SimulationLanguages,
                                    model_opt)
        print(
            "Model created: the WALS features from the target language are incorporated as an additional attention mechanism."
        )

    elif model_opt.wals_model == 'WalsDoublyAttentive_Both':

        MLPFeatureTypes = nn.ModuleDict(mlp_dic_keys)
        MLP_AttentionBoth = build_doublyattentive_both(model_opt)
        print('Embeddings for WALS features and MLP models are built!')
        model = WalsDoublyAttention(model_opt.wals_model, encoder, decoder,
                                    MLP_AttentionBoth, MLPFeatureTypes,
                                    EmbeddingFeatures, FeatureValues,
                                    FeatureTypes, SimulationLanguages,
                                    model_opt)
        print(
            "Model created: the WALS features from the source and target languages are incorporated as an additional attention mechanism."
        )

    elif model_opt.wals_model == 'WalstoDecHidden_Target':

        MLP2WALSHiddenSize_Target = build_mlp2walshiddensize_target(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = WalstoDecHidden(model_opt.wals_model, encoder, decoder,
                                MLP2WALSHiddenSize_Target, EmbeddingFeatures,
                                FeatureValues, FeatureTypes,
                                SimulationLanguages, model_opt)
        print(
            "Model created: concatenates WALS features from the target language to decoder hidden state."
        )

    elif model_opt.wals_model == 'WalstoDecHidden_Both':

        MLP2WALSHiddenSize_Both = build_mlp2walshiddensize_both(
            model_opt, FTInfos)
        print('Embeddings for WALS features and MLP models are built!')
        model = WalstoDecHidden(model_opt.wals_model, encoder, decoder,
                                MLP2WALSHiddenSize_Both, EmbeddingFeatures,
                                FeatureValues, FeatureTypes,
                                SimulationLanguages, model_opt)
        print(
            "Model created: concatenates WALS features from the source and target languages to decoder hidden state."
        )

    else:
        raise Exception("WALS model type not yet implemented: %s" %
                        (opt.wals_model))

    model.model_type = model_opt.model_type

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Example #13
0
def build_lm_bias_base_model(model_opt,
                             fields,
                             gpu,
                             checkpoint=None,
                             lm_out_checkpoint=None,
                             lm_in_checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        lm_out_src_embeddings = build_embeddings(model_opt, src_dict,
                                                 feature_dicts)
        lm_in_src_embeddings = build_embeddings(model_opt, src_dict,
                                                feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
        lm_out_encoder = build_encoder(model_opt, lm_out_src_embeddings, "lm")
        lm_in_encoder = build_encoder(model_opt, lm_in_src_embeddings, "lm")

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt,
                                      tgt_dict,
                                      feature_dicts,
                                      for_encoder=False)
    lm_out_tgt_embeddings = build_embeddings(model_opt,
                                             tgt_dict,
                                             feature_dicts,
                                             for_encoder=False)
    lm_in_tgt_embeddings = build_embeddings(model_opt,
                                            tgt_dict,
                                            feature_dicts,
                                            for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)
    lm_out_decoder = build_decoder(model_opt, lm_out_tgt_embeddings, "lm")
    lm_in_decoder = build_decoder(model_opt, lm_in_tgt_embeddings, "lm")

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")

    model = onmt.models.NMTModel(encoder, decoder)
    lm_out_model = onmt.models.LMModel(lm_out_encoder, lm_out_decoder)
    lm_in_model = onmt.models.LMModel(lm_in_encoder, lm_in_decoder)

    model.model_type = model_opt.model_type
    lm_out_model.model_type = model_opt.model_type
    lm_in_model.model_type = model_opt.model_type

    # Build Generator.
    gen_func = nn.LogSoftmax(dim=-1)
    generator = nn.Sequential(
        nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), gen_func)
    if model_opt.share_decoder_embeddings:
        generator[0].weight = decoder.embeddings.word_lut.weight
    lm_out_generator = nn.Sequential(
        nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), gen_func)
    lm_in_generator = nn.Sequential(
        nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), gen_func)

    # Load the model states from checkpoint or initialize them.
    assert checkpoint is not None
    load_model_dict = {k: checkpoint['model'][k] for k in checkpoint['model']}
    model_dict = model.state_dict()
    model_dict.update(load_model_dict)
    model.load_state_dict(model_dict)
    generator.load_state_dict(checkpoint['generator'])

    assert lm_out_checkpoint['model'] is not None and lm_in_checkpoint[
        'model'] is not None
    load_model_dict = {
        k: lm_out_checkpoint['model'][k]
        for k in lm_out_checkpoint['model']
    }
    model_dict = lm_out_model.state_dict()
    model_dict.update(load_model_dict)
    lm_out_model.load_state_dict(model_dict)
    lm_out_generator.load_state_dict(lm_out_checkpoint['generator'])

    load_model_dict = {
        k: lm_in_checkpoint['model'][k]
        for k in lm_in_checkpoint['model']
    }
    model_dict = lm_in_model.state_dict()
    model_dict.update(load_model_dict)
    lm_in_model.load_state_dict(model_dict)
    lm_in_generator.load_state_dict(lm_in_checkpoint['generator'])

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.lm_out = lm_out_model
    model.lm_in = lm_in_model
    model.lm_out.generator = lm_out_generator
    model.lm_in.generator = lm_in_generator

    for param in model.lm_out.parameters():
        param.requires_grad = False
    for param in model.lm_in.parameters():
        param.requires_grad = False

    model.to(device)
    return model
Example #14
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio", "vector"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    use_src_directly_for_dec = False
    # Build encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        if ("image_channel_size" not in model_opt.__dict__):
            image_channel_size = 3
        else:
            image_channel_size = model_opt.image_channel_size

        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dropout,
                               image_channel_size)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.rnn_type, model_opt.enc_layers,
                               model_opt.dec_layers, model_opt.brnn,
                               model_opt.enc_rnn_size, model_opt.dec_rnn_size,
                               model_opt.audio_enc_pooling, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)
    elif model_opt.model_type == "vector":
        use_src_directly_for_dec = True
        if not hasattr(fields["src"], 'vocab'):
            fields["src"].vocab = fields["tgt"].vocab
        src_dict = fields["src"].vocab
        #self.word_lut.weight.requires_grad = False
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        tgt_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        if model_opt.encoder_type == "rnn" or model_opt.encoder_type == "brnn":
            encoder = RNNEncoder(model_opt.rnn_type, model_opt.brnn,
                                 model_opt.enc_layers, model_opt.enc_rnn_size,
                                 model_opt.dropout, None, model_opt.bridge)
            tgt_embeddings = None
        elif model_opt.decoder_type == "cnn":
            use_src_directly_for_dec = False
            encoder = CNNEncoder(model_opt.enc_layers, model_opt.enc_rnn_size,
                                 model_opt.cnn_kernel_width, model_opt.dropout,
                                 None)
            tgt_embeddings = None
        else:
            encoder = None

    # Build decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    if model_opt.model_type != "vector":
        tgt_embeddings = build_embeddings(model_opt,
                                          tgt_dict,
                                          feature_dicts,
                                          for_encoder=False)
    # else:
    #     tgt_embeddings = None

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    device = torch.device("cuda" if gpu else "cpu")
    if model_opt.decoder_type.startswith("vecdif"):
        model = onmt.models.VecModel(
            encoder,
            decoder,
            use_src_directly_for_dec=use_src_directly_for_dec)
    else:
        model = onmt.models.NMTModel(
            encoder,
            decoder,
            use_src_directly_for_dec=use_src_directly_for_dec)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        elif model_opt.generator_function == "sigmoid":
            gen_func = nn.Sigmoid()
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        if model_opt.model_type == "vector":
            if model_opt.generator_function == "none":
                # if model_opt.final_vec_size != model_opt.dec_rnn_size:
                #     generator = nn.Sequential(
                #         nn.Linear(model_opt.dec_rnn_size, model_opt.final_vec_size))
                # else:
                generator = None
            else:
                generator = nn.Sequential(
                    nn.Linear(model_opt.dec_rnn_size,
                              model_opt.final_vec_size), gen_func)
        else:
            generator = nn.Sequential(
                nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
                gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.dec_rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'], strict=False)
        if generator is not None:
            generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            if generator is not None:
                for p in generator.parameters():
                    p.data.uniform_(-model_opt.param_init,
                                    model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            if generator is not None:
                for p in generator.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)

        if hasattr(model.encoder,
                   'embeddings') and model_opt.model_type != "vector":
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder,
                   'embeddings') and model_opt.model_type != "vector":
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model
Example #15
0
def build_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # for backward compatibility
    if model_opt.rnn_size != -1:
        model_opt.enc_rnn_size = model_opt.rnn_size
        model_opt.dec_rnn_size = model_opt.rnn_size
        if model_opt.model_type == 'text' and \
           model_opt.enc_rnn_size != model_opt.dec_rnn_size:
                raise AssertionError("""We do not support different encoder and
                                     decoder rnn sizes for translation now.""")

    # Build encoder.
    logger.info('Building encoder......')
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = inputters.collect_feature_vocabs(fields, 'src')
        src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts)
        encoder = build_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        if ("image_channel_size" not in model_opt.__dict__):
            image_channel_size = 3
        else:
            image_channel_size = model_opt.image_channel_size

        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.enc_rnn_size,
                               model_opt.dropout,
                               image_channel_size)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.rnn_type,
                               model_opt.enc_layers,
                               model_opt.dec_layers,
                               model_opt.brnn,
                               model_opt.enc_rnn_size,
                               model_opt.dec_rnn_size,
                               model_opt.audio_enc_pooling,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Build decoder.
    logger.info('Building decoder......')
    tgt_dict = fields["tgt"].vocab
    feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = build_embeddings(model_opt, tgt_dict,
                                      feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        logger.info('** Sharing word embedding matrix between src/tgt')
        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight
    elif src_dict == tgt_dict:
        logger.info('WARNING: NOT SHARING WORD EMBEDDINGS FOR TIED VOCAB???')
        exit(0)

    decoder = build_decoder(model_opt, tgt_embeddings)

    # Build NMTModel(= encoder + decoder).
    logger.info('Building NMTModel......')
    device = torch.device("cuda" if gpu else "cpu")
    model = onmt.models.NMTModel(encoder, decoder)

    # Build Generator.
    logger.info('Building generator......')

    # (standard generator)
    #
    # Given final hidden state (after attention) at t-th decoding step, return
    #
    #     s_t = log(softmax(W h_t + b))
    #
    # where W is optionally tied to the decoder word embedding matrix.

    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].vocab)),
            gen_func
        )
        if model_opt.share_decoder_embeddings:
            logger.info('** Sharing generator softmax with tgt word embedding')
            generator[0].weight = decoder.embeddings.word_lut.weight
        else:
            logger.info('WARNING: NOT SHARING GENERATOR SOFTMAX WITH TGT WORD '
                        'EMBEDDING MATRIX - IS THERE A GOOD REASON?')
    else:
        generator = CopyGenerator(model_opt.dec_rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        logger.info('Loade model states from checkpoint......')
        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        logger.info('Initializing parameters......')
        if not model_opt.param_init_glorot:
            logger.info('WARNING: NOT USING XAVIER INITIALIZATION? WILL JUST '
                        'USE UNIF(+- %.2f)' % (model_opt.param_init))
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            if model_opt.pre_word_vecs_enc:
                logger.info('** Using pretrained encoder word embeddings')
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            if model_opt.pre_word_vecs_dec:
                logger.info('** Using pretrained decoder word embeddings')
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator
    model.to(device)

    return model