Example #1
0
# Build the Model
####################################################################################

vocab_size = dataset.vocab_size
ques_length = dataset.ques_length
ans_length = dataset.ans_length + 1
his_length = dataset.ques_length + dataset.ans_length
itow = dataset.itow
img_feat_size = opt.conv_feat_size

netE = _netE(opt.model, opt.ninp, opt.nhid, opt.nlayers, opt.dropout, img_feat_size)

netW = model._netW(vocab_size, opt.ninp, opt.dropout)
netG = _netG(opt.model, vocab_size, opt.ninp, opt.nhid, opt.nlayers, opt.dropout)
critG = model.LMCriterion()
sampler = model.gumbel_sampler()

if opt.cuda:
    netW.cuda()
    netE.cuda()
    netG.cuda()
    critG.cuda()
    sampler.cuda()

if opt.model_path != '':
    netW.load_state_dict(checkpoint['netW'])
    netE.load_state_dict(checkpoint['netE'])
    netG.load_state_dict(checkpoint['netG'])

# training function
def train(epoch):
# Build the Model
####################################################################################
vocab_size = dataset_val.vocab_size
ques_length = dataset_val.ques_length
ans_length = dataset_val.ans_length + 1
his_length = dataset_val.ans_length + dataset_val.ques_length
itow = dataset_val.itow
img_feat_size = 512

print('init Generative model...')
netG = _netG(opt.model, vocab_size, opt.ninp, opt.nhid, opt.nlayers,
             opt.dropout, False)
netE_g = _netE(opt.model, opt.ninp, opt.nhid, opt.nlayers, opt.dropout,
               img_feat_size)
netW_g = model._netW(vocab_size, opt.ninp, opt.dropout)
sampler = model.gumbel_sampler()
critG = model.G_loss(opt.ninp)
critLM = model.LMCriterion()

if opt.evalall == False:  # opt.model_path_D != '' and opt.model_path_G != '':
    print('Loading Generative model...')
    netW_g.load_state_dict(checkpoint['netW'])
    netE_g.load_state_dict(checkpoint['netE'])
    netG.load_state_dict(checkpoint['netG'])
else:
    print('Loading Generative model...')
    netW_g.load_state_dict(checkpoint['netW_g'])
    netE_g.load_state_dict(checkpoint['netE_g'])
    netG.load_state_dict(checkpoint['netG'])

if opt.cuda:  # ship to cuda, if has GPU
def bulid_model_D(opt, src_vocab_size, tgt_vocab_size):
    save_path = opt.save_model
    print ("save_path", save_path)
    try:
        os.makedirs(save_path)
    except OSError:
        pass

    # if opt.train_from != '':
        # print("=> loading checkpoint '{}'".format(opt.train_from))
        # checkpoint = torch.load(opt.train_from)
        # model_path = opt.train_from
    if opt.model_path_D != '' :
        print("=> loading checkpoint '{}'".format(opt.model_path_D))
        checkpoint_D = torch.load(opt.model_path_D)
    else:
        model_path_D = save_path


    # if opt.model_path_G != '':
    #     print("=> loading checkpoint '{}'".format(opt.model_path_G))
    #     checkpoint_G = torch.load(opt.model_path_G)

    lr = opt.LM_lr
    print("rnn size", opt.rnn_size)
    src_netE_att = _netE_att(opt.rnn_size, src_vocab_size, opt.dropout)
    src_netW = _netW(src_vocab_size, opt.rnn_size, opt.dropout, name="src", cuda=opt.cuda)
    print("src_netW", src_vocab_size, src_netW.word_embed)
    tgt_netW = _netW(tgt_vocab_size, opt.rnn_size, opt.dropout, name="tgt", cuda=opt.cuda)
    print("tgt_netW", tgt_vocab_size, tgt_netW.word_embed)
    tgt_netE_att = _netE_att(opt.rnn_size, tgt_vocab_size, opt.dropout, is_target=True)

    critD = model.nPairLoss(opt.rnn_size, opt.margin)

    if opt.model_path_D != '' : # load the pre-trained model.
        src_netW.load_state_dict(checkpoint_D['src_netW'])
        tgt_netW.load_state_dict(checkpoint_D['tgt_netW'])
        src_netE_att.load_state_dict(checkpoint_D['src_netE_att'])
        tgt_netE_att.load_state_dict(checkpoint_D['tgt_netE_att'])
        lr = checkpoint_D['lr']

    if opt.cuda:
        tgt_netW.cuda()
        src_netW.cuda()
        src_netE_att.cuda()
        tgt_netE_att.cuda()
        critD.cuda()

    print('init Generative model...')

    sampler = model.gumbel_sampler()
    critG = model.G_loss(opt.rnn_size)
    critLM = model.LMCriterion()
    BLEU_score = model.BLEU_score()

    if opt.cuda: # ship to cuda, if has GPU
        sampler.cuda()
        critG.cuda()
        critLM.cuda()
        BLEU_score.cuda()

    print("load netD successfully")
    return tgt_netW, src_netW, src_netE_att, tgt_netE_att, critD, lr, sampler, critG, critLM, BLEU_score