Exemple #1
0
def init_model(word2id, opt):
    model = Seq2SeqLSTMAttention(
        emb_dim=opt.word_vec_size,
        vocab_size=opt.vocab_size,
        src_hidden_dim=opt.rnn_size,
        trg_hidden_dim=opt.rnn_size,
        ctx_hidden_dim=opt.rnn_size,
        attention_mode='dot',
        batch_size=opt.batch_size,
        bidirectional=opt.bidirectional,
        pad_token_src=word2id[pykp.io.PAD_WORD],
        pad_token_trg=word2id[pykp.io.PAD_WORD],
        nlayers_src=opt.enc_layers,
        nlayers_trg=opt.dec_layers,
        dropout=opt.dropout,
        teacher_forcing_ratio=opt.teacher_forcing_ratio,
        scheduled_sampling=opt.scheduled_sampling,
        scheduled_sampling_batches=opt.scheduled_sampling_batches)

    logging.info(
        '======================  Model Parameters  =========================')
    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)
        if torch.cuda.is_available():
            model.load_state_dict(torch.load(open(opt.train_from, 'rb')))
        else:
            model.load_state_dict(
                torch.load(open(opt.train_from, 'rb'),
                           map_location=lambda storage, loc: storage))
    utils.tally_parameters(model)

    return model
def init_model(opt):
    logging.info(
        '======================  Model Parameters  =========================')

    if opt.cascading_model:
        model = Seq2SeqLSTMAttentionCascading(opt)
    else:
        if opt.copy_attention:
            logging.info('Train a Seq2Seq model with Copy Mechanism')
        else:
            logging.info('Train a normal Seq2Seq model')

        if opt.encoder_type == 'bert':
            model = Seq2SeqBERT(opt)
        elif opt.encoder_type == 'bert_low':
            model = Seq2SeqBERTLow(opt)
        elif opt.encoder_type == 'transformer':
            model = Seq2SeqTransformer(opt)
        else:
            model = Seq2SeqLSTMAttention(opt)

    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)
        # train_from_model_dir = opt.train_from[:opt.train_from.rfind('model/') + 6]
        # load the saved the meta-model and override the current one
        # model = torch.load(
        #     open(os.path.join(opt.model_path, opt.exp + '.initial.model'), 'rb')
        # )

        if torch.cuda.is_available() and opt.useGpu:
            checkpoint = torch.load(open(opt.train_from, 'rb'))
        else:
            checkpoint = torch.load(open(opt.train_from, 'rb'),
                                    map_location=lambda storage, loc: storage)
        # some compatible problems, keys are started with 'module.'
        # checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v) for k, v in checkpoint.items()])
        model.load_state_dict(checkpoint)
    else:
        # dump the meta-model
        torch.save(
            model.state_dict(),
            open(
                os.path.join(opt.train_from[:opt.train_from.find('.epoch=')],
                             'initial.model'), 'wb'))

    utils.tally_parameters(model)

    return model
Exemple #3
0
def init_model(opt):
    logging.info(
        '======================  Model Parameters  =========================')

    if opt.cascading_model:
        model = Seq2SeqLSTMAttentionCascading(opt)
    else:
        model = Seq2SeqLSTMAttention(opt)

    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)

        # load the saved the meta-model and override the current one
        # model = torch.load(
        #     open(os.path.join(opt.model_path, opt.exp, '.initial.model'), 'wb')
        # )

        if torch.cuda.is_available() and opt.use_gpu:
            checkpoint = torch.load(open(opt.train_from, 'rb'))
        else:
            checkpoint = torch.load(open(opt.train_from, 'rb'),
                                    map_location=lambda storage, loc: storage)
        # some compatible problems, keys are started with 'module.'
        checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v)
                           for k, v in checkpoint.items()])
        model.load_state_dict(checkpoint)
    else:
        # dump the meta-model
        meta_model_dir = os.path.join(
            opt.train_from[:opt.train_from.find('.epoch=')], 'initial.model')

        torch.save(model.state_dict(), open(meta_model_dir, 'wb'))

    if torch.cuda.is_available() and opt.use_gpu:

        model = model.cuda()

    utils.tally_parameters(model)
    # embedding = torch.load('embedding40004.pt')
    # model.init_embedding(embedding,requires_grad=False)

    return model
Exemple #4
0
def init_model(opt):
    logging.info(
        '======================  Model Parameters  =========================')

    if opt.cascading_model:
        model = Seq2SeqLSTMAttentionCascading(opt)
    else:
        model = Seq2SeqLSTMAttention(opt)
    # opt.train_from ="/home/wangxingpeng/en/keyphrase/model/kp20k.ml.copy.bi-directional.test_sgd_0.01/kp20k.ml.copy.bi-directional.epoch=1.batch=3700.total_batch=3700.model"
    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)

        # load the saved the meta-model and override the current one
        # model = torch.load(
        #     open(os.path.join(opt.model_path, opt.exp, '.initial.model'), 'wb')
        # )

        if torch.cuda.is_available() and opt.use_gpu:
            checkpoint = torch.load(open(opt.train_from, 'rb'))
        else:
            checkpoint = torch.load(open(opt.train_from, 'rb'),
                                    map_location=lambda storage, loc: storage)
        # some compatible problems, keys are started with 'module.'
        checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v)
                           for k, v in checkpoint.items()])
        model.load_state_dict(checkpoint)
    else:
        # dump the meta-model
        meta_model_dir = os.path.join(
            opt.train_from[:opt.train_from.find('.epoch=')], 'initial.model')

        torch.save(model.state_dict(), open(meta_model_dir, 'wb'))

    if torch.cuda.is_available() and opt.use_gpu:
        model = model.cuda()

    utils.tally_parameters(model)

    return model
Exemple #5
0
def init_model(opt):
    logging.info(
        '======================  Model Parameters  =========================')

    if not opt.copy_model:
        logging.info('Train a normal seq2seq model')
        model = Seq2SeqLSTMAttention(
            emb_dim=opt.word_vec_size,
            vocab_size=opt.vocab_size,
            src_hidden_dim=opt.rnn_size,
            trg_hidden_dim=opt.rnn_size,
            ctx_hidden_dim=opt.rnn_size,
            attention_mode='dot',
            batch_size=opt.batch_size,
            bidirectional=opt.bidirectional,
            pad_token_src=opt.word2id[pykp.io.PAD_WORD],
            pad_token_trg=opt.word2id[pykp.io.PAD_WORD],
            nlayers_src=opt.enc_layers,
            nlayers_trg=opt.dec_layers,
            dropout=opt.dropout,
            must_teacher_forcing=opt.must_teacher_forcing,
            teacher_forcing_ratio=opt.teacher_forcing_ratio,
            scheduled_sampling=opt.scheduled_sampling,
            scheduled_sampling_batches=opt.scheduled_sampling_batches,
        )
    else:
        logging.info('Train a seq2seq model with copy mechanism')
        model = Seq2SeqLSTMAttentionCopy(
            emb_dim=opt.word_vec_size,
            vocab_size=opt.vocab_size,
            src_hidden_dim=opt.rnn_size,
            trg_hidden_dim=opt.rnn_size,
            ctx_hidden_dim=opt.rnn_size,
            attention_mode='dot',
            batch_size=opt.batch_size,
            bidirectional=opt.bidirectional,
            pad_token_src=opt.word2id[pykp.io.PAD_WORD],
            pad_token_trg=opt.word2id[pykp.io.PAD_WORD],
            nlayers_src=opt.enc_layers,
            nlayers_trg=opt.dec_layers,
            dropout=opt.dropout,
            must_teacher_forcing=opt.must_teacher_forcing,
            teacher_forcing_ratio=opt.teacher_forcing_ratio,
            scheduled_sampling=opt.scheduled_sampling,
            scheduled_sampling_batches=opt.scheduled_sampling_batches,
            unk_word=opt.word2id[pykp.io.UNK_WORD],
        )

    if torch.cuda.is_available():
        model = model.cuda()

    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)
        if torch.cuda.is_available():
            checkpoint = torch.load(open(opt.train_from, 'rb'))
        else:
            checkpoint = torch.load(open(opt.train_from, 'rb'),
                                    map_location=lambda storage, loc: storage)
        print(checkpoint.keys())
        # some compatible problems, keys are started with 'module.'
        checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v)
                           for k, v in checkpoint.items()])
        model.load_state_dict(checkpoint)

    utils.tally_parameters(model)

    return model